diff --git a/.buildinfo b/.buildinfo new file mode 100644 index 000000000..6f2a23e57 --- /dev/null +++ b/.buildinfo @@ -0,0 +1,4 @@ +# Sphinx build info version 1 +# This file records the configuration used when building these files. When it is not found, a full rebuild will be done. +config: 0112ff129e9f201f32787e3a1d90daab +tags: 645f666f9bcd5a90fca523b33c5a78b7 diff --git a/.doctrees/api-processbuilder.doctree b/.doctrees/api-processbuilder.doctree new file mode 100644 index 000000000..7760bb97e Binary files /dev/null and b/.doctrees/api-processbuilder.doctree differ diff --git a/.doctrees/api-processes.doctree b/.doctrees/api-processes.doctree new file mode 100644 index 000000000..52eafacb8 Binary files /dev/null and b/.doctrees/api-processes.doctree differ diff --git a/.doctrees/api.doctree b/.doctrees/api.doctree new file mode 100644 index 000000000..97259fb6b Binary files /dev/null and b/.doctrees/api.doctree differ diff --git a/.doctrees/auth.doctree b/.doctrees/auth.doctree new file mode 100644 index 000000000..55a4ee682 Binary files /dev/null and b/.doctrees/auth.doctree differ diff --git a/.doctrees/basics.doctree b/.doctrees/basics.doctree new file mode 100644 index 000000000..14c8ec411 Binary files /dev/null and b/.doctrees/basics.doctree differ diff --git a/.doctrees/batch_jobs.doctree b/.doctrees/batch_jobs.doctree new file mode 100644 index 000000000..65f7e7b9c Binary files /dev/null and b/.doctrees/batch_jobs.doctree differ diff --git a/.doctrees/best_practices.doctree b/.doctrees/best_practices.doctree new file mode 100644 index 000000000..ad9dab3fd Binary files /dev/null and b/.doctrees/best_practices.doctree differ diff --git a/.doctrees/changelog.doctree b/.doctrees/changelog.doctree new file mode 100644 index 000000000..a64379ee4 Binary files /dev/null and b/.doctrees/changelog.doctree differ diff --git a/.doctrees/configuration.doctree b/.doctrees/configuration.doctree new file mode 100644 index 000000000..fc7c74f81 Binary files /dev/null and b/.doctrees/configuration.doctree differ diff --git a/.doctrees/cookbook/ard.doctree b/.doctrees/cookbook/ard.doctree new file mode 100644 index 000000000..aa200531c Binary files /dev/null and b/.doctrees/cookbook/ard.doctree differ diff --git a/.doctrees/cookbook/index.doctree b/.doctrees/cookbook/index.doctree new file mode 100644 index 000000000..0812ea1e9 Binary files /dev/null and b/.doctrees/cookbook/index.doctree differ diff --git a/.doctrees/cookbook/job_manager.doctree b/.doctrees/cookbook/job_manager.doctree new file mode 100644 index 000000000..4b6eef397 Binary files /dev/null and b/.doctrees/cookbook/job_manager.doctree differ diff --git a/.doctrees/cookbook/localprocessing.doctree b/.doctrees/cookbook/localprocessing.doctree new file mode 100644 index 000000000..79eca6683 Binary files /dev/null and b/.doctrees/cookbook/localprocessing.doctree differ diff --git a/.doctrees/cookbook/sampling.doctree b/.doctrees/cookbook/sampling.doctree new file mode 100644 index 000000000..2a372287f Binary files /dev/null and b/.doctrees/cookbook/sampling.doctree differ diff --git a/.doctrees/cookbook/spectral_indices.doctree b/.doctrees/cookbook/spectral_indices.doctree new file mode 100644 index 000000000..db7200686 Binary files /dev/null and b/.doctrees/cookbook/spectral_indices.doctree differ diff --git a/.doctrees/cookbook/tricks.doctree b/.doctrees/cookbook/tricks.doctree new file mode 100644 index 000000000..534712e7d Binary files /dev/null and b/.doctrees/cookbook/tricks.doctree differ diff --git a/.doctrees/cookbook/udp_sharing.doctree b/.doctrees/cookbook/udp_sharing.doctree new file mode 100644 index 000000000..afb09cb95 Binary files /dev/null and b/.doctrees/cookbook/udp_sharing.doctree differ diff --git a/.doctrees/data_access.doctree b/.doctrees/data_access.doctree new file mode 100644 index 000000000..cfe752bc6 Binary files /dev/null and b/.doctrees/data_access.doctree differ diff --git a/.doctrees/datacube_construction.doctree b/.doctrees/datacube_construction.doctree new file mode 100644 index 000000000..ce53dc5cb Binary files /dev/null and b/.doctrees/datacube_construction.doctree differ diff --git a/.doctrees/development.doctree b/.doctrees/development.doctree new file mode 100644 index 000000000..6dcf377fc Binary files /dev/null and b/.doctrees/development.doctree differ diff --git a/.doctrees/environment.pickle b/.doctrees/environment.pickle new file mode 100644 index 000000000..86333e8f9 Binary files /dev/null and b/.doctrees/environment.pickle differ diff --git a/.doctrees/index.doctree b/.doctrees/index.doctree new file mode 100644 index 000000000..b5cd1487d Binary files /dev/null and b/.doctrees/index.doctree differ diff --git a/.doctrees/installation.doctree b/.doctrees/installation.doctree new file mode 100644 index 000000000..2404b72b4 Binary files /dev/null and b/.doctrees/installation.doctree differ diff --git a/.doctrees/machine_learning.doctree b/.doctrees/machine_learning.doctree new file mode 100644 index 000000000..ce578f815 Binary files /dev/null and b/.doctrees/machine_learning.doctree differ diff --git a/.doctrees/process_mapping.doctree b/.doctrees/process_mapping.doctree new file mode 100644 index 000000000..e89a81c09 Binary files /dev/null and b/.doctrees/process_mapping.doctree differ diff --git a/.doctrees/processes.doctree b/.doctrees/processes.doctree new file mode 100644 index 000000000..61609c618 Binary files /dev/null and b/.doctrees/processes.doctree differ diff --git a/.doctrees/udf.doctree b/.doctrees/udf.doctree new file mode 100644 index 000000000..2e15d2d7d Binary files /dev/null and b/.doctrees/udf.doctree differ diff --git a/.doctrees/udp.doctree b/.doctrees/udp.doctree new file mode 100644 index 000000000..90808b0ca Binary files /dev/null and b/.doctrees/udp.doctree differ diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 000000000..e69de29bb diff --git a/_images/apply-rescaled-histogram.png b/_images/apply-rescaled-histogram.png new file mode 100644 index 000000000..07d97647d Binary files /dev/null and b/_images/apply-rescaled-histogram.png differ diff --git a/_images/batchjobs-jupyter-created.png b/_images/batchjobs-jupyter-created.png new file mode 100644 index 000000000..8dd25f34c Binary files /dev/null and b/_images/batchjobs-jupyter-created.png differ diff --git a/_images/batchjobs-jupyter-listing.png b/_images/batchjobs-jupyter-listing.png new file mode 100644 index 000000000..6e94d16b1 Binary files /dev/null and b/_images/batchjobs-jupyter-listing.png differ diff --git a/_images/batchjobs-jupyter-logs.png b/_images/batchjobs-jupyter-logs.png new file mode 100644 index 000000000..e9e286e3e Binary files /dev/null and b/_images/batchjobs-jupyter-logs.png differ diff --git a/_images/batchjobs-webeditor-listing.png b/_images/batchjobs-webeditor-listing.png new file mode 100644 index 000000000..4462f6d42 Binary files /dev/null and b/_images/batchjobs-webeditor-listing.png differ diff --git a/_images/evi-composite.png b/_images/evi-composite.png new file mode 100644 index 000000000..5680bf03e Binary files /dev/null and b/_images/evi-composite.png differ diff --git a/_images/evi-masked-composite.png b/_images/evi-masked-composite.png new file mode 100644 index 000000000..82e5014f6 Binary files /dev/null and b/_images/evi-masked-composite.png differ diff --git a/_images/evi-timeseries.png b/_images/evi-timeseries.png new file mode 100644 index 000000000..106d9f0f6 Binary files /dev/null and b/_images/evi-timeseries.png differ diff --git a/_images/local_ndvi.jpg b/_images/local_ndvi.jpg new file mode 100644 index 000000000..75c523dcc Binary files /dev/null and b/_images/local_ndvi.jpg differ diff --git a/_images/logging_arrayshape.png b/_images/logging_arrayshape.png new file mode 100644 index 000000000..c8b8535ef Binary files /dev/null and b/_images/logging_arrayshape.png differ diff --git a/_images/welcome.png b/_images/welcome.png new file mode 100644 index 000000000..84951d060 Binary files /dev/null and b/_images/welcome.png differ diff --git a/_modules/index.html b/_modules/index.html new file mode 100644 index 000000000..c25be4759 --- /dev/null +++ b/_modules/index.html @@ -0,0 +1,149 @@ + + + + + + + Overview: module code — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + +
+
+ +
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/openeo/api/logs.html b/_modules/openeo/api/logs.html new file mode 100644 index 000000000..ae9255bb4 --- /dev/null +++ b/_modules/openeo/api/logs.html @@ -0,0 +1,229 @@ + + + + + + + openeo.api.logs — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +

Source code for openeo.api.logs

+import logging
+from typing import Optional, Union
+
+
+
+[docs] +class LogEntry(dict): + """ + Log message and info for jobs and services + + Fields: + - ``id``: Unique ID for the log, string, REQUIRED + - ``code``: Error code, string, optional + - ``level``: Severity level, string (error, warning, info or debug), REQUIRED + - ``message``: Error message, string, REQUIRED + - ``time``: Date and time of the error event as RFC3339 date-time, string, available since API 1.1.0 + - ``path``: A "stack trace" for the process, array of dicts + - ``links``: Related links, array of dicts + - ``usage``: Usage metrics available as property 'usage', dict, available since API 1.1.0 + May contain the following metrics: cpu, memory, duration, network, disk, storage and other custom ones + Each of the metrics is also a dict with the following parts: value (numeric) and unit (string) + - ``data``: Arbitrary data the user wants to "log" for debugging purposes. + Please note that this property may not exist as there's a difference + between None and non-existing. None for example refers to no-data in + many cases while the absence of the property means that the user did + not provide any data for debugging. + """ + + _required = {"id", "level", "message"} + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + # Check required fields + missing = self._required.difference(self.keys()) + if missing: + raise ValueError("Missing required fields: {m}".format(m=sorted(missing))) + + @property + def id(self): + return self["id"] + + # Legacy alias + log_id = id + + @property + def message(self): + return self["message"] + + @property + def level(self): + return self["level"]
+ + + # TODO: add properties for "code", "time", "path", "links" and "data" with sensible defaults? + + +
+[docs] +def normalize_log_level( + log_level: Union[int, str, None], default: int = logging.DEBUG +) -> int: + """ + Helper function to convert a openEO API log level (e.g. string "error") + to the integer constants defined in Python's standard library ``logging`` module (e.g. ``logging.ERROR``). + + :param log_level: log level to normalize: a log level string in the style of + the openEO API ("error", "warning", "info", or "debug"), + an integer value (e.g. a ``logging`` constant), or ``None``. + + :param default: fallback log level to return on unknown log level strings or ``None`` input. + + :raises TypeError: when log_level is any other type than str, an int or None. + :return: One of the following log level constants from the standard module ``logging``: + ``logging.ERROR``, ``logging.WARNING``, ``logging.INFO``, or ``logging.DEBUG`` . + """ + if isinstance(log_level, str): + log_level = log_level.upper() + if log_level in ["CRITICAL", "ERROR", "FATAL"]: + return logging.ERROR + elif log_level in ["WARNING", "WARN"]: + return logging.WARNING + elif log_level == "INFO": + return logging.INFO + elif log_level == "DEBUG": + return logging.DEBUG + else: + return default + elif isinstance(log_level, int): + return log_level + elif log_level is None: + return default + else: + raise TypeError( + f"Value for log_level is not an int or str: type={type(log_level)}, value={log_level!r}" + )
+ + + +def log_level_name(log_level: Union[int, str, None]) -> str: + """ + Get the name of a normalized log level. + This value conforms to log level names used in the openEO API. + """ + return logging.getLevelName(normalize_log_level(log_level)).lower() +
+ +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/openeo/api/process.html b/_modules/openeo/api/process.html new file mode 100644 index 000000000..07926b18f --- /dev/null +++ b/_modules/openeo/api/process.html @@ -0,0 +1,651 @@ + + + + + + + openeo.api.process — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +

Source code for openeo.api.process

+from __future__ import annotations
+
+import textwrap
+import warnings
+from typing import List, Optional, Union
+
+
+
+[docs] +class Parameter: + """ + A (process) parameter to build parameterized + :ref:`user-defined processes<user-defined-processes>`. + + Parameter objects can be :ref:`defined <udp-declaring-parameters>` + with at least a name and expected schema + (e.g. is the parameter a placeholder for a string, a bounding box, a date, ...) + and can then be :ref:`used <build_and_store_udp>` + with various functions and classes, + like :py:class:`~openeo.rest.datacube.DataCube`, + to build parameterized user-defined processes. + + Apart from the generic :py:class:`Parameter` constructor, + this class also provides various helpers (class methods) + to easily create parameters for common parameter types. + + :param name: parameter name, which will be used to assign concrete values to. + It is recommended to stick to the convention of snake case naming (using lowercase with underscores). + :param description: human-readable description of the parameter. + :param schema: JSON schema describing the expected data type and structure of the parameter. + :param default: default value for the parameter when it's optional. + :param optional: toggle to indicate whether the parameter is optional or required. + """ + # TODO unify with openeo.internal.processes.parse.Parameter? + __slots__ = ("name", "description", "schema", "default", "optional") + + _DEFAULT_UNDEFINED = object() + + def __init__( + self, + name: str, + description: Optional[str] = None, + schema: Union[list, dict, str, None] = None, + default=_DEFAULT_UNDEFINED, + optional: Optional[bool] = None, + ): + self.name = name + if description is None: + # Description is required in openEO API, we are a bit more permissive here. + warnings.warn("Parameter without description: using name as description.") + description = name + self.description = description + self.schema = {"type": schema} if isinstance(schema, str) else (schema or {}) + # TODO: automatically set `optional` when `default` is set? + self.default = default + self.optional = optional + +
+[docs] + def to_dict(self) -> dict: + """ + Convert to dictionary for JSON-serialization. + """ + d = {"name": self.name, "description": self.description, "schema": self.schema} + if self.optional is not None: + d["optional"] = self.optional + if self.default is not self._DEFAULT_UNDEFINED: + d["default"] = self.default + d["optional"] = True + return d
+ + +
+[docs] + @classmethod + def raster_cube(cls, name: str = "data", description: str = "A data cube.", **kwargs) -> Parameter: + """ + Helper to easily create a 'raster-cube' parameter. + + :param name: parameter name, which will be used to assign concrete values to. + It is recommended to stick to the convention of snake case naming (using lowercase with underscores). + :param description: human-readable description of the parameter. + + See the generic :py:class:`Parameter` constructor for information on additional arguments (except ``schema``). + """ + schema = {"type": "object", "subtype": "raster-cube"} + return cls(name=name, description=description, schema=schema, **kwargs)
+ + +
+[docs] + @classmethod + def datacube(cls, name: str = "data", description: str = "A data cube.", **kwargs) -> Parameter: + """ + Helper to easily create a 'datacube' parameter. + + :param name: parameter name, which will be used to assign concrete values to. + It is recommended to stick to the convention of snake case naming (using lowercase with underscores). + :param description: human-readable description of the parameter. + + See the generic :py:class:`Parameter` constructor for information on additional arguments (except ``schema``). + + .. versionadded:: 0.22.0 + """ + schema = {"type": "object", "subtype": "datacube"} + return cls(name=name, description=description, schema=schema, **kwargs)
+ + +
+[docs] + @classmethod + def string( + cls, + name: str, + description: Optional[str] = None, + *, + values: Optional[List[str]] = None, + subtype: Optional[str] = None, + format: Optional[str] = None, + **kwargs, + ) -> Parameter: + """ + Helper to easily create a 'string' parameter. + + :param name: parameter name, which will be used to assign concrete values to. + It is recommended to stick to the convention of snake case naming (using lowercase with underscores). + :param description: human-readable description of the parameter. + :param values: Optional list of allowed string values to make this an "enum". + :param subtype: Optional subtype of the 'string' schema. + :param format: Optional format of the 'string' schema. + + See the generic :py:class:`Parameter` constructor for information on additional arguments (except ``schema``). + """ + schema = {"type": "string"} + if values is not None: + schema["enum"] = values + if subtype: + schema["subtype"] = subtype + if format: + schema["format"] = format + return cls(name=name, description=description, schema=schema, **kwargs)
+ + +
+[docs] + @classmethod + def integer(cls, name: str, description: Optional[str] = None, **kwargs) -> Parameter: + """ + Helper to create an 'integer' parameter. + + :param name: parameter name, which will be used to assign concrete values to. + It is recommended to stick to the convention of snake case naming (using lowercase with underscores). + :param description: human-readable description of the parameter. + + See the generic :py:class:`Parameter` constructor for information on additional arguments (except ``schema``). + """ + return cls(name=name, description=description, schema={"type": "integer"}, **kwargs)
+ + +
+[docs] + @classmethod + def number(cls, name: str, description: Optional[str] = None, **kwargs) -> Parameter: + """ + Helper to easily create a 'number' parameter. + + :param name: parameter name, which will be used to assign concrete values to. + It is recommended to stick to the convention of snake case naming (using lowercase with underscores). + :param description: human-readable description of the parameter. + + See the generic :py:class:`Parameter` constructor for information on additional arguments (except ``schema``). + """ + return cls(name=name, description=description, schema={"type": "number"}, **kwargs)
+ + +
+[docs] + @classmethod + def boolean(cls, name: str, description: Optional[str] = None, **kwargs) -> Parameter: + """ + Helper to easily create a 'boolean' parameter. + + :param name: parameter name, which will be used to assign concrete values to. + It is recommended to stick to the convention of snake case naming (using lowercase with underscores). + :param description: human-readable description of the parameter. + + See the generic :py:class:`Parameter` constructor for information on additional arguments (except ``schema``). + """ + return cls(name=name, description=description, schema={"type": "boolean"}, **kwargs)
+ + +
+[docs] + @classmethod + def array( + cls, + name: str, + description: Optional[str] = None, + *, + item_schema: Optional[Union[str, dict]] = None, + **kwargs, + ) -> Parameter: + """ + Helper to easily create parameter with an 'array' schema. + + :param name: parameter name, which will be used to assign concrete values to. + It is recommended to stick to the convention of snake case naming (using lowercase with underscores). + :param description: human-readable description of the parameter. + :param item_schema: Schema of the array items given in JSON Schema style, e.g. ``{"type": "string"}``. + Simple schemas can also be specified as single string: + e.g. ``"string"`` will be expanded to ``{"type": "string"}``. + + See the generic :py:class:`Parameter` constructor for information on additional arguments (except ``schema``). + + .. versionchanged:: 0.23.0 + Added ``item_schema`` argument. + """ + schema = {"type": "array"} + if item_schema: + if isinstance(item_schema, str): + item_schema = {"type": item_schema} + schema["items"] = item_schema + return cls(name=name, description=description, schema=schema, **kwargs)
+ + +
+[docs] + @classmethod + def object( + cls, name: str, description: Optional[str] = None, *, subtype: Optional[str] = None, **kwargs + ) -> Parameter: + """ + Helper to create an 'object' type parameter + + :param name: parameter name, which will be used to assign concrete values to. + It is recommended to stick to the convention of snake case naming (using lowercase with underscores). + :param description: human-readable description of the parameter. + :param subtype: subtype of the 'object' schema + + See the generic :py:class:`Parameter` constructor for information on additional arguments (except ``schema``). + + .. versionadded:: 0.26.0 + """ + schema = {"type": "object"} + if subtype: + schema["subtype"] = subtype + return cls(name=name, description=description, schema=schema, **kwargs)
+ + +
+[docs] + @classmethod + def bounding_box( + cls, + name: str, + description: str = "Spatial extent specified as a bounding box with 'west', 'south', 'east' and 'north' fields.", + **kwargs, + ) -> Parameter: + """ + Helper to easily create a 'bounding box' parameter, which allows to specify a spatial extent + with "west", "south", "east" and "north" bounds (and optionally a CRS identifier). + + :param name: parameter name, which will be used to assign concrete values to. + It is recommended to stick to the convention of snake case naming (using lowercase with underscores). + :param description: human-readable description of the parameter. + + See the generic :py:class:`Parameter` constructor for information on additional arguments (except ``schema``). + + .. versionadded:: 0.30.0 + """ + schema = { + "type": "object", + "subtype": "bounding-box", + "required": ["west", "south", "east", "north"], + "properties": { + "west": { + "type": "number", + "description": "West (lower left corner, coordinate axis 1).", + }, + "south": { + "type": "number", + "description": "South (lower left corner, coordinate axis 2).", + }, + "east": { + "type": "number", + "description": "East (upper right corner, coordinate axis 1).", + }, + "north": { + "type": "number", + "description": "North (upper right corner, coordinate axis 2).", + }, + "crs": { + "description": "Coordinate reference system of the extent, specified as as [EPSG code](http://www.epsg-registry.org/) or [WKT2 CRS string](http://docs.opengeospatial.org/is/18-010r7/18-010r7.html). Defaults to `4326` (EPSG code 4326) unless the client explicitly requests a different coordinate reference system.", + "anyOf": [ + { + "type": "integer", + "subtype": "epsg-code", + "title": "EPSG Code", + "minimum": 1000, + }, + { + "type": "string", + "subtype": "wkt2-definition", + "title": "WKT2 definition", + }, + ], + "default": 4326, + }, + # TODO: support base and height? + }, + } + return cls(name=name, description=description, schema=schema, **kwargs)
+ + +
+[docs] + @classmethod + def spatial_extent( + cls, + name: str = "spatial_extent", + description: Optional[str] = None, + **kwargs, + ) -> Parameter: + """ + Helper to easily create a 'spatial_extent' parameter, which is compatible with the ``load_collection`` argument of + the same name. This allows to conveniently create user-defined processes that can be applied to a bounding box and vector data + for spatial filtering. It is also possible for users to set to null, and define spatial filtering using other processes. + + :param name: parameter name, which will be used to assign concrete values to. + It is recommended to stick to the convention of snake case naming (using lowercase with underscores). + :param description: human-readable description of the parameter. + + See the generic :py:class:`Parameter` constructor for information on additional arguments (except ``schema``). + + .. versionadded:: 0.32.0 + """ + if description is None: + description = textwrap.dedent( + """ + Limits the data to process to the specified bounding box or polygons. + + For raster data, the process loads the pixel into the data cube if the point + at the pixel center intersects with the bounding box or any of the polygons + (as defined in the Simple Features standard by the OGC). + + For vector data, the process loads the geometry into the data cube if the geometry + is fully within the bounding box or any of the polygons (as defined in the + Simple Features standard by the OGC). Empty geometries may only be in the + data cube if no spatial extent has been provided. + + Empty geometries are ignored. + + Set this parameter to null to set no limit for the spatial extent. + """ + ).strip() + + schema = [ + { + "title": "Bounding Box", + "type": "object", + "subtype": "bounding-box", + "required": ["west", "south", "east", "north"], + "properties": { + "west": {"description": "West (lower left corner, coordinate axis 1).", "type": "number"}, + "south": {"description": "South (lower left corner, coordinate axis 2).", "type": "number"}, + "east": {"description": "East (upper right corner, coordinate axis 1).", "type": "number"}, + "north": {"description": "North (upper right corner, coordinate axis 2).", "type": "number"}, + "base": { + "description": "Base (optional, lower left corner, coordinate axis 3).", + "type": ["number", "null"], + "default": None, + }, + "height": { + "description": "Height (optional, upper right corner, coordinate axis 3).", + "type": ["number", "null"], + "default": None, + }, + "crs": { + "description": "Coordinate reference system of the extent, specified as as [EPSG code](http://www.epsg-registry.org/) or [WKT2 CRS string](http://docs.opengeospatial.org/is/18-010r7/18-010r7.html). Defaults to `4326` (EPSG code 4326) unless the client explicitly requests a different coordinate reference system.", + "anyOf": [ + { + "title": "EPSG Code", + "type": "integer", + "subtype": "epsg-code", + "minimum": 1000, + "examples": [3857], + }, + {"title": "WKT2", "type": "string", "subtype": "wkt2-definition"}, + ], + "default": 4326, + }, + }, + }, + { + "title": "Vector data cube", + "description": "Limits the data cube to the bounding box of the given geometries in the vector data cube. For raster data, all pixels inside the bounding box that do not intersect with any of the polygons will be set to no data (`null`). Empty geometries are ignored.", + "type": "object", + "subtype": "datacube", + "dimensions": [{"type": "geometry"}], + }, + { + "title": "No filter", + "description": "Don't filter spatially. All data is included in the data cube.", + "type": "null", + }, + ] + return cls(name=name, description=description, schema=schema, **kwargs)
+ + +
+[docs] + @classmethod + def date(cls, name: str, description: str = "A date.", **kwargs) -> Parameter: + """ + Helper to easily create a 'date' parameter. + + :param name: parameter name, which will be used to assign concrete values to. + It is recommended to stick to the convention of snake case naming (using lowercase with underscores). + :param description: human-readable description of the parameter. + + See the generic :py:class:`Parameter` constructor for information on additional arguments (except ``schema``). + + .. versionadded:: 0.30.0 + """ + schema = {"type": "string", "subtype": "date", "format": "date"} + return cls(name=name, description=description, schema=schema, **kwargs)
+ + +
+[docs] + @classmethod + def date_time(cls, name: str, description: str = "A date with time.", **kwargs) -> Parameter: + """ + Helper to easily create a 'date-time' parameter. + + :param name: parameter name, which will be used to assign concrete values to. + It is recommended to stick to the convention of snake case naming (using lowercase with underscores). + :param description: human-readable description of the parameter. + + See the generic :py:class:`Parameter` constructor for information on additional arguments (except ``schema``). + + .. versionadded:: 0.30.0 + """ + schema = {"type": "string", "subtype": "date-time", "format": "date-time"} + return cls(name=name, description=description, schema=schema, **kwargs)
+ + +
+[docs] + @classmethod + def geojson(cls, name: str, description: str = "Geometries specified as GeoJSON object.", **kwargs) -> Parameter: + """ + Helper to easily create a 'geojson' parameter, which allows to specify geometries as an inline GeoJSON object. + + :param name: parameter name, which will be used to assign concrete values to. + It is recommended to stick to the convention of snake case naming (using lowercase with underscores). + :param description: human-readable description of the parameter. + + See the generic :py:class:`Parameter` constructor for information on additional arguments (except ``schema``). + + .. versionadded:: 0.30.0 + """ + schema = {"type": "object", "subtype": "geojson"} + return cls(name=name, description=description, schema=schema, **kwargs)
+ + +
+[docs] + @classmethod + def temporal_interval( + cls, + name: str = "temporal_extent", + description: str = "Temporal extent specified as two-element array with start and end date/date-time.", + **kwargs, + ) -> Parameter: + """ + Helper to easily create a 'temporal-interval' parameter, which allows to specify a temporal extent + as a two-element array with start and end date/date-time. + + :param name: parameter name, which will be used to assign concrete values to. + It is recommended to stick to the convention of snake case naming (using lowercase with underscores). + :param description: human-readable description of the parameter. + + See the generic :py:class:`Parameter` constructor for information on additional arguments (except ``schema``). + + .. versionadded:: 0.30.0 + """ + schema = { + "type": "array", + "subtype": "temporal-interval", + "uniqueItems": True, + "minItems": 2, + "maxItems": 2, + "items": { + "anyOf": [ + {"type": "string", "subtype": "date-time", "format": "date-time"}, + {"type": "string", "subtype": "date", "format": "date"}, + {"type": "null"}, + ] + }, + } + return cls(name=name, description=description, schema=schema, **kwargs)
+
+ + + +def schema_supports(schema: Union[dict, List[dict]], type: str, subtype: Optional[str] = None) -> bool: + """Helper to check if parameter schema supports given type/subtype""" + # TODO: support checking item type in arrays + if isinstance(schema, dict): + actual_type = schema.get("type") + if isinstance(actual_type, str): + if actual_type != type: + return False + elif isinstance(actual_type, list): + if type not in actual_type: + return False + else: + raise ValueError(actual_type) + if subtype: + if schema.get("subtype") != subtype: + return False + return True + elif isinstance(schema, list): + return any(schema_supports(s, type=type, subtype=subtype) for s in schema) + else: + raise ValueError(schema) +
+ +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/openeo/extra/job_management.html b/_modules/openeo/extra/job_management.html new file mode 100644 index 000000000..55c1a725f --- /dev/null +++ b/_modules/openeo/extra/job_management.html @@ -0,0 +1,1356 @@ + + + + + + + openeo.extra.job_management — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +

Source code for openeo.extra.job_management

+import abc
+import collections
+import contextlib
+import dataclasses
+import datetime
+import json
+import logging
+import re
+import time
+import warnings
+from pathlib import Path
+from threading import Thread
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    Iterable,
+    List,
+    Mapping,
+    NamedTuple,
+    Optional,
+    Union,
+)
+
+import numpy
+import pandas as pd
+import requests
+import shapely.errors
+import shapely.geometry.base
+import shapely.wkt
+from requests.adapters import HTTPAdapter, Retry
+
+from openeo import BatchJob, Connection
+from openeo.internal.processes.parse import (
+    Parameter,
+    Process,
+    parse_remote_process_definition,
+)
+from openeo.rest import OpenEoApiError
+from openeo.util import LazyLoadCache, deep_get, repr_truncate, rfc3339
+
+_log = logging.getLogger(__name__)
+
+
+class _Backend(NamedTuple):
+    """Container for backend info/settings"""
+
+    # callable to create a backend connection
+    get_connection: Callable[[], Connection]
+    # Maximum number of jobs to allow in parallel on a backend
+    parallel_jobs: int
+
+
+MAX_RETRIES = 5
+
+# Sentinel value to indicate that a parameter was not set
+_UNSET = object()
+
+
+
+[docs] +class JobDatabaseInterface(metaclass=abc.ABCMeta): + """ + Interface for a database of job metadata to use with the :py:class:`MultiBackendJobManager`, + allowing to regularly persist the job metadata while polling the job statuses + and resume/restart the job tracking after it was interrupted. + + .. versionadded:: 0.31.0 + """ + +
+[docs] + @abc.abstractmethod + def exists(self) -> bool: + """Does the job database already exist, to read job data from?""" + ...
+ + +
+[docs] + @abc.abstractmethod + def persist(self, df: pd.DataFrame): + """ + Store job data to the database. + The provided dataframe may contain partial information, which is merged into the larger database. + + :param df: job data to store. + """ + ...
+ + +
+[docs] + @abc.abstractmethod + def count_by_status(self, statuses: Iterable[str] = ()) -> dict: + """ + Retrieve the number of jobs per status. + + :param statuses: List/set of statuses to include. If empty, all statuses are included. + + :return: dictionary with status as key and the count as value. + """ + ...
+ + +
+[docs] + @abc.abstractmethod + def get_by_status(self, statuses: List[str], max=None) -> pd.DataFrame: + """ + Returns a dataframe with jobs, filtered by status. + + :param statuses: List of statuses to include. + :param max: Maximum number of jobs to return. + + :return: DataFrame with jobs filtered by status. + """ + ...
+
+ + + +def _start_job_default(row: pd.Series, connection: Connection, *args, **kwargs): + raise NotImplementedError("No 'start_job' callable provided") + + +@dataclasses.dataclass(frozen=True) +class _ColumnProperties: + """Expected/required properties of a column in the job manager related dataframes""" + + dtype: str = "object" + default: Any = None + + +
+[docs] +class MultiBackendJobManager: + """ + Tracker for multiple jobs on multiple backends. + + Usage example: + + .. code-block:: python + + import logging + import pandas as pd + import openeo + from openeo.extra.job_management import MultiBackendJobManager + + logging.basicConfig( + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + level=logging.INFO + ) + + manager = MultiBackendJobManager() + manager.add_backend("foo", connection=openeo.connect("http://foo.test")) + manager.add_backend("bar", connection=openeo.connect("http://bar.test")) + + jobs_df = pd.DataFrame(...) + output_file = "jobs.csv" + + def start_job( + row: pd.Series, + connection: openeo.Connection, + **kwargs + ) -> openeo.BatchJob: + year = row["year"] + cube = connection.load_collection( + ..., + temporal_extent=[f"{year}-01-01", f"{year+1}-01-01"], + ) + ... + return cube.create_job(...) + + manager.run_jobs(df=jobs_df, start_job=start_job, output_file=output_file) + + See :py:meth:`.run_jobs` for more information on the ``start_job`` callable. + + :param poll_sleep: + How many seconds to sleep between polls. + + :param root_dir: + Root directory to save files for the jobs, e.g. metadata and error logs. + This defaults to "." the current directory. + + Each job gets its own subfolder in this root directory. + You can use the following methods to find the relevant paths, + based on the job ID: + + - get_job_dir + - get_error_log_path + - get_job_metadata_path + + :param cancel_running_job_after: + Optional temporal limit (in seconds) after which running jobs should be canceled + by the job manager. + + .. versionadded:: 0.14.0 + + .. versionchanged:: 0.32.0 + Added ``cancel_running_job_after`` parameter. + """ + + # Expected columns in the job DB dataframes. + # TODO: make this part of public API when settled? + _COLUMN_REQUIREMENTS: Mapping[str, _ColumnProperties] = { + "id": _ColumnProperties(dtype="str"), + "backend_name": _ColumnProperties(dtype="str"), + "status": _ColumnProperties(dtype="str", default="not_started"), + # TODO: use proper date/time dtype instead of legacy str for start times? + "start_time": _ColumnProperties(dtype="str"), + "running_start_time": _ColumnProperties(dtype="str"), + # TODO: these columns "cpu", "memory", "duration" are not referenced explicitly from MultiBackendJobManager, + # but are indirectly coupled through handling of VITO-specific "usage" metadata in `_track_statuses`. + # Since bfd99e34 they are not really required to be present anymore, can we make that more explicit? + "cpu": _ColumnProperties(dtype="str"), + "memory": _ColumnProperties(dtype="str"), + "duration": _ColumnProperties(dtype="str"), + "costs": _ColumnProperties(dtype="float64"), + } + + def __init__( + self, + poll_sleep: int = 60, + root_dir: Optional[Union[str, Path]] = ".", + *, + cancel_running_job_after: Optional[int] = None, + ): + """Create a MultiBackendJobManager.""" + self._stop_thread = None + self.backends: Dict[str, _Backend] = {} + self.poll_sleep = poll_sleep + self._connections: Dict[str, _Backend] = {} + + # An explicit None or "" should also default to "." + self._root_dir = Path(root_dir or ".") + + self._cancel_running_job_after = ( + datetime.timedelta(seconds=cancel_running_job_after) if cancel_running_job_after is not None else None + ) + self._thread = None + +
+[docs] + def add_backend( + self, + name: str, + connection: Union[Connection, Callable[[], Connection]], + parallel_jobs: int = 2, + ): + """ + Register a backend with a name and a Connection getter. + + :param name: + Name of the backend. + :param connection: + Either a Connection to the backend, or a callable to create a backend connection. + :param parallel_jobs: + Maximum number of jobs to allow in parallel on a backend. + """ + + # TODO: Code might become simpler if we turn _Backend into class move this logic there. + # We would need to keep add_backend here as part of the public API though. + # But the amount of unrelated "stuff to manage" would be less (better cohesion) + if isinstance(connection, Connection): + c = connection + connection = lambda: c + assert callable(connection) + self.backends[name] = _Backend(get_connection=connection, parallel_jobs=parallel_jobs)
+ + + def _get_connection(self, backend_name: str, resilient: bool = True) -> Connection: + """Get a connection for the backend and optionally make it resilient (adds retry behavior) + + The default is to get a resilient connection, but if necessary you can turn it off with + resilient=False + """ + + # TODO: Code could be simplified if _Backend is a class and this method is moved there. + # TODO: Is it better to make this a public method? + + # Reuse the connection if we can, in order to avoid modifying the same connection several times. + # This is to avoid adding the retry HTTPAdapter multiple times. + # Remember that the get_connection attribute on _Backend can be a Connection object instead + # of a callable, so we don't want to assume it is a fresh connection that doesn't have the + # retry adapter yet. + if backend_name in self._connections: + return self._connections[backend_name] + + connection = self.backends[backend_name].get_connection() + # If we really need it we can skip making it resilient, but by default it should be resilient. + if resilient: + self._make_resilient(connection) + + self._connections[backend_name] = connection + return connection + + @staticmethod + def _make_resilient(connection): + """Add an HTTPAdapter that retries the request if it fails. + + Retry for the following HTTP 50x statuses: + 502 Bad Gateway + 503 Service Unavailable + 504 Gateway Timeout + """ + # TODO: refactor this helper out of this class and unify with `openeo_driver.util.http.requests_with_retry` + status_forcelist = [500, 502, 503, 504] + retries = Retry( + total=MAX_RETRIES, + read=MAX_RETRIES, + other=MAX_RETRIES, + status=MAX_RETRIES, + backoff_factor=0.1, + status_forcelist=status_forcelist, + allowed_methods=["HEAD", "GET", "OPTIONS", "POST"], + ) + connection.session.mount("https://", HTTPAdapter(max_retries=retries)) + connection.session.mount("http://", HTTPAdapter(max_retries=retries)) + + @classmethod + def _normalize_df(cls, df: pd.DataFrame) -> pd.DataFrame: + """ + Normalize given pandas dataframe (creating a new one): + ensure we have the required columns. + + :param df: The dataframe to normalize. + :return: a new dataframe that is normalized. + """ + new_columns = {col: req.default for (col, req) in cls._COLUMN_REQUIREMENTS.items() if col not in df.columns} + df = df.assign(**new_columns) + + return df + +
+[docs] + def start_job_thread(self, start_job: Callable[[], BatchJob], job_db: JobDatabaseInterface): + """ + Start running the jobs in a separate thread, returns afterwards. + + :param start_job: + A callback which will be invoked with, amongst others, + the row of the dataframe for which a job should be created and/or started. + This callable should return a :py:class:`openeo.rest.job.BatchJob` object. + + The following parameters will be passed to ``start_job``: + + ``row`` (:py:class:`pandas.Series`): + The row in the pandas dataframe that stores the jobs state and other tracked data. + + ``connection_provider``: + A getter to get a connection by backend name. + Typically, you would need either the parameter ``connection_provider``, + or the parameter ``connection``, but likely you will not need both. + + ``connection`` (:py:class:`Connection`): + The :py:class:`Connection` itself, that has already been created. + Typically, you would need either the parameter ``connection_provider``, + or the parameter ``connection``, but likely you will not need both. + + ``provider`` (``str``): + The name of the backend that will run the job. + + You do not have to define all the parameters described below, but if you leave + any of them out, then remember to include the ``*args`` and ``**kwargs`` parameters. + Otherwise you will have an exception because :py:meth:`run_jobs` passes unknown parameters to ``start_job``. + :param job_db: + Job database to load/store existing job status data and other metadata from/to. + Can be specified as a path to CSV or Parquet file, + or as a custom database object following the :py:class:`JobDatabaseInterface` interface. + + .. note:: + Support for Parquet files depends on the ``pyarrow`` package + as :ref:`optional dependency <installation-optional-dependencies>`. + + .. versionadded:: 0.32.0 + """ + + # Resume from existing db + _log.info(f"Resuming `run_jobs` from existing {job_db}") + + self._stop_thread = False + + def run_loop(): + + # TODO: support user-provided `stats` + stats = collections.defaultdict(int) + + while ( + sum(job_db.count_by_status(statuses=["not_started", "created", "queued", "running"]).values()) > 0 + and not self._stop_thread + ): + self._job_update_loop(job_db=job_db, start_job=start_job) + stats["run_jobs loop"] += 1 + + _log.info(f"Job status histogram: {job_db.count_by_status()}. Run stats: {dict(stats)}") + # Do sequence of micro-sleeps to allow for quick thread exit + for _ in range(int(max(1, self.poll_sleep))): + time.sleep(1) + if self._stop_thread: + break + + self._thread = Thread(target=run_loop) + self._thread.start()
+ + +
+[docs] + def stop_job_thread(self, timeout_seconds: Optional[float] = _UNSET): + """ + Stop the job polling thread. + + :param timeout_seconds: The time to wait for the thread to stop. + By default, it will wait for 2 times the poll_sleep time. + Set to None to wait indefinitely. + + .. versionadded:: 0.32.0 + """ + if self._thread is not None: + self._stop_thread = True + if timeout_seconds is _UNSET: + timeout_seconds = 2 * self.poll_sleep + self._thread.join(timeout_seconds) + if self._thread.is_alive(): + _log.warning("Job thread did not stop after timeout") + else: + _log.error("No job thread to stop")
+ + +
+[docs] + def run_jobs( + self, + df: Optional[pd.DataFrame] = None, + start_job: Callable[[], BatchJob] = _start_job_default, + job_db: Union[str, Path, JobDatabaseInterface, None] = None, + **kwargs, + ) -> dict: + """Runs jobs, specified in a dataframe, and tracks parameters. + + :param df: + DataFrame that specifies the jobs, and tracks the jobs' statuses. If None, the job_db has to be specified and will be used. + + :param start_job: + A callback which will be invoked with, amongst others, + the row of the dataframe for which a job should be created and/or started. + This callable should return a :py:class:`openeo.rest.job.BatchJob` object. + + The following parameters will be passed to ``start_job``: + + ``row`` (:py:class:`pandas.Series`): + The row in the pandas dataframe that stores the jobs state and other tracked data. + + ``connection_provider``: + A getter to get a connection by backend name. + Typically, you would need either the parameter ``connection_provider``, + or the parameter ``connection``, but likely you will not need both. + + ``connection`` (:py:class:`Connection`): + The :py:class:`Connection` itself, that has already been created. + Typically, you would need either the parameter ``connection_provider``, + or the parameter ``connection``, but likely you will not need both. + + ``provider`` (``str``): + The name of the backend that will run the job. + + You do not have to define all the parameters described below, but if you leave + any of them out, then remember to include the ``*args`` and ``**kwargs`` parameters. + Otherwise you will have an exception because :py:meth:`run_jobs` passes unknown parameters to ``start_job``. + + :param job_db: + Job database to load/store existing job status data and other metadata from/to. + Can be specified as a path to CSV or Parquet file, + or as a custom database object following the :py:class:`JobDatabaseInterface` interface. + + .. note:: + Support for Parquet files depends on the ``pyarrow`` package + as :ref:`optional dependency <installation-optional-dependencies>`. + + :return: dictionary with stats collected during the job running loop. + Note that the set of fields in this dictionary is experimental + and subject to change + + .. versionchanged:: 0.31.0 + Added support for persisting the job metadata in Parquet format. + + .. versionchanged:: 0.31.0 + Replace ``output_file`` argument with ``job_db`` argument, + which can be a path to a CSV or Parquet file, + or a user-defined :py:class:`JobDatabaseInterface` object. + The deprecated ``output_file`` argument is still supported for now. + + .. versionchanged:: 0.33.0 + return a stats dictionary + """ + # TODO Defining start_jobs as a Protocol might make its usage more clear, and avoid complicated docstrings, + + # Backwards compatibility for deprecated `output_file` argument + if "output_file" in kwargs: + if job_db is not None: + raise ValueError("Only one of `output_file` and `job_db` should be provided") + warnings.warn( + "The `output_file` argument is deprecated. Use `job_db` instead.", DeprecationWarning, stacklevel=2 + ) + job_db = kwargs.pop("output_file") + assert not kwargs, f"Unexpected keyword arguments: {kwargs!r}" + + if isinstance(job_db, (str, Path)): + job_db = get_job_db(path=job_db) + + if not isinstance(job_db, JobDatabaseInterface): + raise ValueError(f"Unsupported job_db {job_db!r}") + + if job_db.exists(): + # Resume from existing db + _log.info(f"Resuming `run_jobs` from existing {job_db}") + elif df is not None: + # TODO: start showing deprecation warnings for this usage pattern? + job_db.initialize_from_df(df) + + # TODO: support user-provided `stats` + stats = collections.defaultdict(int) + + while sum(job_db.count_by_status(statuses=["not_started", "created", "queued", "running"]).values()) > 0: + self._job_update_loop(job_db=job_db, start_job=start_job, stats=stats) + stats["run_jobs loop"] += 1 + + # Show current stats and sleep + _log.info(f"Job status histogram: {job_db.count_by_status()}. Run stats: {dict(stats)}") + time.sleep(self.poll_sleep) + stats["sleep"] += 1 + + return stats
+ + + def _job_update_loop( + self, job_db: JobDatabaseInterface, start_job: Callable[[], BatchJob], stats: Optional[dict] = None + ): + """ + Inner loop logic of job management: + go through the necessary jobs to check for status updates, + trigger status events, start new jobs when there is room for them, etc. + """ + if not self.backends: + raise RuntimeError("No backends registered") + + stats = stats if stats is not None else collections.defaultdict(int) + + with ignore_connection_errors(context="get statuses"): + self._track_statuses(job_db, stats=stats) + stats["track_statuses"] += 1 + + not_started = job_db.get_by_status(statuses=["not_started"], max=200).copy() + if len(not_started) > 0: + # Check number of jobs running at each backend + running = job_db.get_by_status(statuses=["created", "queued", "running"]) + stats["job_db get_by_status"] += 1 + per_backend = running.groupby("backend_name").size().to_dict() + _log.info(f"Running per backend: {per_backend}") + total_added = 0 + for backend_name in self.backends: + backend_load = per_backend.get(backend_name, 0) + if backend_load < self.backends[backend_name].parallel_jobs: + to_add = self.backends[backend_name].parallel_jobs - backend_load + for i in not_started.index[total_added : total_added + to_add]: + self._launch_job(start_job, df=not_started, i=i, backend_name=backend_name, stats=stats) + stats["job launch"] += 1 + + job_db.persist(not_started.loc[i : i + 1]) + stats["job_db persist"] += 1 + total_added += 1 + + def _launch_job(self, start_job, df, i, backend_name, stats: Optional[dict] = None): + """Helper method for launching jobs + + :param start_job: + A callback which will be invoked with the row of the dataframe for which a job should be started. + This callable should return a :py:class:`openeo.rest.job.BatchJob` object. + + See also: + `MultiBackendJobManager.run_jobs` for the parameters and return type of this callable + + Even though it is called here in `_launch_job` and that is where the constraints + really come from, the public method `run_jobs` needs to document `start_job` anyway, + so let's avoid duplication in the docstrings. + + :param df: + DataFrame that specifies the jobs, and tracks the jobs' statuses. + + :param i: + index of the job's row in dataframe df + + :param backend_name: + name of the backend that will execute the job. + """ + stats = stats if stats is not None else collections.defaultdict(int) + + df.loc[i, "backend_name"] = backend_name + row = df.loc[i] + try: + _log.info(f"Starting job on backend {backend_name} for {row.to_dict()}") + connection = self._get_connection(backend_name, resilient=True) + + stats["start_job call"] += 1 + job = start_job( + row=row, + connection_provider=self._get_connection, + connection=connection, + provider=backend_name, + ) + except requests.exceptions.ConnectionError as e: + _log.warning(f"Failed to start job for {row.to_dict()}", exc_info=True) + df.loc[i, "status"] = "start_failed" + stats["start_job error"] += 1 + else: + df.loc[i, "start_time"] = rfc3339.utcnow() + if job: + df.loc[i, "id"] = job.job_id + with ignore_connection_errors(context="get status"): + status = job.status() + stats["job get status"] += 1 + df.loc[i, "status"] = status + if status == "created": + # start job if not yet done by callback + try: + job.start() + stats["job start"] += 1 + df.loc[i, "status"] = job.status() + stats["job get status"] += 1 + except OpenEoApiError as e: + _log.error(e) + df.loc[i, "status"] = "start_failed" + stats["job start error"] += 1 + else: + # TODO: what is this "skipping" about actually? + df.loc[i, "status"] = "skipped" + stats["start_job skipped"] += 1 + +
+[docs] + def on_job_done(self, job: BatchJob, row): + """ + Handles jobs that have finished. Can be overridden to provide custom behaviour. + + Default implementation downloads the results into a folder containing the title. + + :param job: The job that has finished. + :param row: DataFrame row containing the job's metadata. + """ + # TODO: param `row` is never accessed in this method. Remove it? Is this intended for future use? + + job_metadata = job.describe() + job_dir = self.get_job_dir(job.job_id) + metadata_path = self.get_job_metadata_path(job.job_id) + + self.ensure_job_dir_exists(job.job_id) + job.get_results().download_files(target=job_dir) + + with metadata_path.open("w", encoding="utf-8") as f: + json.dump(job_metadata, f, ensure_ascii=False)
+ + +
+[docs] + def on_job_error(self, job: BatchJob, row): + """ + Handles jobs that stopped with errors. Can be overridden to provide custom behaviour. + + Default implementation writes the error logs to a JSON file. + + :param job: The job that has finished. + :param row: DataFrame row containing the job's metadata. + """ + # TODO: param `row` is never accessed in this method. Remove it? Is this intended for future use? + + error_logs = job.logs(level="error") + error_log_path = self.get_error_log_path(job.job_id) + + if len(error_logs) > 0: + self.ensure_job_dir_exists(job.job_id) + error_log_path.write_text(json.dumps(error_logs, indent=2))
+ + +
+[docs] + def on_job_cancel(self, job: BatchJob, row): + """ + Handle a job that was cancelled. Can be overridden to provide custom behaviour. + + Default implementation does not do anything. + + :param job: The job that was canceled. + :param row: DataFrame row containing the job's metadata. + """ + pass
+ + + def _cancel_prolonged_job(self, job: BatchJob, row): + """Cancel the job if it has been running for too long.""" + job_running_start_time = rfc3339.parse_datetime(row["running_start_time"], with_timezone=True) + elapsed = datetime.datetime.now(tz=datetime.timezone.utc) - job_running_start_time + if elapsed > self._cancel_running_job_after: + try: + _log.info( + f"Cancelling long-running job {job.job_id} (after {elapsed}, running since {job_running_start_time})" + ) + job.stop() + except OpenEoApiError as e: + _log.error(f"Failed to cancel long-running job {job.job_id}: {e}") + +
+[docs] + def get_job_dir(self, job_id: str) -> Path: + """Path to directory where job metadata, results and error logs are be saved.""" + return self._root_dir / f"job_{job_id}"
+ + +
+[docs] + def get_error_log_path(self, job_id: str) -> Path: + """Path where error log file for the job is saved.""" + return self.get_job_dir(job_id) / f"job_{job_id}_errors.json"
+ + +
+[docs] + def get_job_metadata_path(self, job_id: str) -> Path: + """Path where job metadata file is saved.""" + return self.get_job_dir(job_id) / f"job_{job_id}.json"
+ + +
+[docs] + def ensure_job_dir_exists(self, job_id: str) -> Path: + """Create the job folder if it does not exist yet.""" + job_dir = self.get_job_dir(job_id) + if not job_dir.exists(): + job_dir.mkdir(parents=True)
+ + + def _track_statuses(self, job_db: JobDatabaseInterface, stats: Optional[dict] = None): + """ + Tracks status (and stats) of running jobs (in place). + Optionally cancels jobs when running too long. + """ + stats = stats if stats is not None else collections.defaultdict(int) + + active = job_db.get_by_status(statuses=["created", "queued", "running"]).copy() + for i in active.index: + job_id = active.loc[i, "id"] + backend_name = active.loc[i, "backend_name"] + previous_status = active.loc[i, "status"] + + try: + con = self._get_connection(backend_name) + the_job = con.job(job_id) + job_metadata = the_job.describe() + stats["job describe"] += 1 + new_status = job_metadata["status"] + + _log.info( + f"Status of job {job_id!r} (on backend {backend_name}) is {new_status!r} (previously {previous_status!r})" + ) + + if new_status == "finished": + stats["job finished"] += 1 + self.on_job_done(the_job, active.loc[i]) + + if previous_status != "error" and new_status == "error": + stats["job failed"] += 1 + self.on_job_error(the_job, active.loc[i]) + + if previous_status in {"created", "queued"} and new_status == "running": + stats["job started running"] += 1 + active.loc[i, "running_start_time"] = rfc3339.utcnow() + + if new_status == "canceled": + stats["job canceled"] += 1 + self.on_job_cancel(the_job, active.loc[i]) + + if self._cancel_running_job_after and new_status == "running": + self._cancel_prolonged_job(the_job, active.loc[i]) + + active.loc[i, "status"] = new_status + + # TODO: there is well hidden coupling here with "cpu", "memory" and "duration" from `_normalize_df` + for key in job_metadata.get("usage", {}).keys(): + if key in active.columns: + active.loc[i, key] = _format_usage_stat(job_metadata, key) + if "costs" in job_metadata.keys(): + active.loc[i, "costs"] = job_metadata.get("costs") + + except OpenEoApiError as e: + # TODO: inspect status code and e.g. differentiate between 4xx/5xx + stats["job tracking error"] += 1 + _log.warning(f"Error while tracking status of job {job_id!r} on backend {backend_name}: {e!r}") + + stats["job_db persist"] += 1 + job_db.persist(active)
+ + + +def _format_usage_stat(job_metadata: dict, field: str) -> str: + value = deep_get(job_metadata, "usage", field, "value", default=0) + unit = deep_get(job_metadata, "usage", field, "unit", default="") + return f"{value} {unit}".strip() + + +@contextlib.contextmanager +def ignore_connection_errors(context: Optional[str] = None, sleep: int = 5): + """Context manager to ignore connection errors.""" + # TODO: move this out of this module and make it a more public utility? + try: + yield + except requests.exceptions.ConnectionError as e: + _log.warning(f"Ignoring connection error (context {context or 'n/a'}): {e}") + # Back off a bit + time.sleep(sleep) + + +class FullDataFrameJobDatabase(JobDatabaseInterface): + + def __init__(self): + super().__init__() + self._df = None + + def initialize_from_df(self, df: pd.DataFrame, *, on_exists: str = "error"): + """ + Initialize the job database from a given dataframe, + which will be first normalized to be compatible + with :py:class:`MultiBackendJobManager` usage. + + :param df: dataframe with some columns your ``start_job`` callable expects + :param on_exists: what to do when the job database already exists (persisted on disk): + - "error": (default) raise an exception + - "skip": work with existing database, ignore given dataframe and skip any initialization + + :return: initialized job database. + + .. versionadded:: 0.33.0 + """ + # TODO: option to provide custom MultiBackendJobManager subclass with custom normalize? + if self.exists(): + if on_exists == "skip": + return self + elif on_exists == "error": + raise FileExistsError(f"Job database {self!r} already exists.") + else: + # TODO handle other on_exists modes: e.g. overwrite, merge, ... + raise ValueError(f"Invalid on_exists={on_exists!r}") + df = MultiBackendJobManager._normalize_df(df) + self.persist(df) + # Return self to allow chaining with constructor. + return self + + @abc.abstractmethod + def read(self) -> pd.DataFrame: + """ + Read job data from the database as pandas DataFrame. + + :return: loaded job data. + """ + ... + + @property + def df(self) -> pd.DataFrame: + if self._df is None: + self._df = self.read() + return self._df + + def count_by_status(self, statuses: Iterable[str] = ()) -> dict: + status_histogram = self.df.groupby("status").size().to_dict() + statuses = set(statuses) + if statuses: + status_histogram = {k: v for k, v in status_histogram.items() if k in statuses} + return status_histogram + + def get_by_status(self, statuses, max=None) -> pd.DataFrame: + """ + Returns a dataframe with jobs, filtered by status. + + :param statuses: List of statuses to include. + :param max: Maximum number of jobs to return. + + :return: DataFrame with jobs filtered by status. + """ + df = self.df + filtered = df[df.status.isin(statuses)] + return filtered.head(max) if max is not None else filtered + + def _merge_into_df(self, df: pd.DataFrame): + if self._df is not None: + self._df.update(df, overwrite=True) + else: + self._df = df + + +
+[docs] +class CsvJobDatabase(FullDataFrameJobDatabase): + """ + Persist/load job metadata with a CSV file. + + :implements: :py:class:`JobDatabaseInterface` + :param path: Path to local CSV file. + + .. note:: + Support for GeoPandas dataframes depends on the ``geopandas`` package + as :ref:`optional dependency <installation-optional-dependencies>`. + + .. versionadded:: 0.31.0 + """ + + def __init__(self, path: Union[str, Path]): + super().__init__() + self.path = Path(path) + + def __repr__(self): + return f"{self.__class__.__name__}({str(self.path)!r})" + + def exists(self) -> bool: + return self.path.exists() + + def _is_valid_wkt(self, wkt: str) -> bool: + try: + shapely.wkt.loads(wkt) + return True + except shapely.errors.WKTReadingError: + return False + + def read(self) -> pd.DataFrame: + df = pd.read_csv( + self.path, + # TODO: possible to avoid hidden coupling with MultiBackendJobManager here? + dtype={c: r.dtype for (c, r) in MultiBackendJobManager._COLUMN_REQUIREMENTS.items()}, + ) + if ( + "geometry" in df.columns + and df["geometry"].dtype.name != "geometry" + and self._is_valid_wkt(df["geometry"].iloc[0]) + ): + import geopandas + + # `df.to_csv()` in `persist()` has encoded geometries as WKT, so we decode that here. + df = geopandas.GeoDataFrame(df, geometry=geopandas.GeoSeries.from_wkt(df["geometry"])) + return df + + def persist(self, df: pd.DataFrame): + self._merge_into_df(df) + self.path.parent.mkdir(parents=True, exist_ok=True) + self.df.to_csv(self.path, index=False)
+ + + +
+[docs] +class ParquetJobDatabase(FullDataFrameJobDatabase): + """ + Persist/load job metadata with a Parquet file. + + :implements: :py:class:`JobDatabaseInterface` + :param path: Path to the Parquet file. + + .. note:: + Support for Parquet files depends on the ``pyarrow`` package + as :ref:`optional dependency <installation-optional-dependencies>`. + + Support for GeoPandas dataframes depends on the ``geopandas`` package + as :ref:`optional dependency <installation-optional-dependencies>`. + + .. versionadded:: 0.31.0 + """ + + def __init__(self, path: Union[str, Path]): + super().__init__() + self.path = Path(path) + + def __repr__(self): + return f"{self.__class__.__name__}({str(self.path)!r})" + + def exists(self) -> bool: + return self.path.exists() + + def read(self) -> pd.DataFrame: + # Unfortunately, a naive `pandas.read_parquet()` does not easily allow + # reconstructing geometries from a GeoPandas Parquet file. + # And vice-versa, `geopandas.read_parquet()` does not support reading + # Parquet file without geometries. + # So we have to guess which case we have. + # TODO is there a cleaner way to do this? + import pyarrow.parquet + + metadata = pyarrow.parquet.read_metadata(self.path) + if b"geo" in metadata.metadata: + import geopandas + + return geopandas.read_parquet(self.path) + else: + return pd.read_parquet(self.path) + + def persist(self, df: pd.DataFrame): + self._merge_into_df(df) + self.path.parent.mkdir(parents=True, exist_ok=True) + self.df.to_parquet(self.path, index=False)
+ + + +def get_job_db(path: Union[str, Path]) -> JobDatabaseInterface: + """ + Factory to get a job database at a given path, + guessing the database type from filename extension. + + :param path: path to job database file. + + .. versionadded:: 0.33.0 + """ + path = Path(path) + if path.suffix.lower() in {".csv"}: + job_db = CsvJobDatabase(path=path) + elif path.suffix.lower() in {".parquet", ".geoparquet"}: + job_db = ParquetJobDatabase(path=path) + else: + raise ValueError(f"Could not guess job database type from {path!r}") + return job_db + + +def create_job_db(path: Union[str, Path], df: pd.DataFrame, *, on_exists: str = "error"): + """ + Factory to create a job database at given path, + initialized from a given dataframe, + and its database type guessed from filename extension. + + :param path: Path to the job database file. + :param df: DataFrame to store in the job database. + :param on_exists: What to do when the job database already exists: + - "error": (default) raise an exception + - "skip": work with existing database, ignore given dataframe and skip any initialization + + .. versionadded:: 0.33.0 + """ + job_db = get_job_db(path) + if isinstance(job_db, FullDataFrameJobDatabase): + job_db.initialize_from_df(df=df, on_exists=on_exists) + else: + raise NotImplementedError(f"Initialization of {type(job_db)} is not supported.") + return job_db + + +
+[docs] +class ProcessBasedJobCreator: + """ + Batch job creator + (to be used together with :py:class:`MultiBackendJobManager`) + that takes a parameterized openEO process definition + (e.g a user-defined process (UDP) or a remote openEO process definition), + and creates a batch job + for each row of the dataframe managed by the :py:class:`MultiBackendJobManager` + by filling in the process parameters with corresponding row values. + + .. seealso:: + See :ref:`job-management-with-process-based-job-creator` + for more information and examples. + + Process parameters are linked to dataframe columns by name. + While this intuitive name-based matching should cover most use cases, + there are additional options for overrides or fallbacks: + + - When provided, ``parameter_column_map`` will be consulted + for resolving a process parameter name (key in the dictionary) + to a desired dataframe column name (corresponding value). + - One common case is handled automatically as convenience functionality. + + When: + + - ``parameter_column_map`` is not provided (or set to ``None``), + - and there is a *single parameter* that accepts inline GeoJSON geometries, + - and the dataframe is a GeoPandas dataframe with a *single geometry* column, + + then this parameter and this geometries column will be linked automatically. + + - If a parameter can not be matched with a column by name as described above, + a default value will be picked, + first by looking in ``parameter_defaults`` (if provided), + and then by looking up the default value from the parameter schema in the process definition. + - Finally if no (default) value can be determined and the parameter + is not flagged as optional, an error will be raised. + + + :param process_id: (optional) openEO process identifier. + Can be omitted when working with a remote process definition + that is fully defined with a URL in the ``namespace`` parameter. + :param namespace: (optional) openEO process namespace. + Typically used to provide a URL to a remote process definition. + :param parameter_defaults: (optional) default values for process parameters, + to be used when not available in the dataframe managed by + :py:class:`MultiBackendJobManager`. + :param parameter_column_map: Optional overrides + for linking process parameters to dataframe columns: + mapping of process parameter names as key + to dataframe column names as value. + + .. versionadded:: 0.33.0 + + .. warning:: + This is an experimental API subject to change, + and we greatly welcome + `feedback and suggestions for improvement <https://github.com/Open-EO/openeo-python-client/issues>`_. + + """ + + def __init__( + self, + *, + process_id: Optional[str] = None, + namespace: Union[str, None] = None, + parameter_defaults: Optional[dict] = None, + parameter_column_map: Optional[dict] = None, + ): + if process_id is None and namespace is None: + raise ValueError("At least one of `process_id` and `namespace` should be provided.") + self._process_id = process_id + self._namespace = namespace + self._parameter_defaults = parameter_defaults or {} + self._parameter_column_map = parameter_column_map + self._cache = LazyLoadCache() + + def _get_process_definition(self, connection: Connection) -> Process: + if isinstance(self._namespace, str) and re.match("https?://", self._namespace): + # Remote process definition handling + return self._cache.get( + key=("remote_process_definition", self._namespace, self._process_id), + load=lambda: parse_remote_process_definition(namespace=self._namespace, process_id=self._process_id), + ) + elif self._namespace is None: + # Handling of a user-specific UDP + udp_raw = connection.user_defined_process(self._process_id).describe() + return Process.from_dict(udp_raw) + else: + raise NotImplementedError( + f"Unsupported process definition source udp_id={self._process_id!r} namespace={self._namespace!r}" + ) + +
+[docs] + def start_job(self, row: pd.Series, connection: Connection, **_) -> BatchJob: + """ + Implementation of the ``start_job`` callable interface + of :py:meth:`MultiBackendJobManager.run_jobs` + to create a job based on given dataframe row + + :param row: The row in the pandas dataframe that stores the jobs state and other tracked data. + :param connection: The connection to the backend. + """ + # TODO: refactor out some methods, for better reuse and decoupling: + # `get_arguments()` (to build the arguments dictionary), `get_cube()` (to create the cube), + + process_definition = self._get_process_definition(connection=connection) + process_id = process_definition.id + parameters = process_definition.parameters or [] + + if self._parameter_column_map is None: + self._parameter_column_map = self._guess_parameter_column_map(parameters=parameters, row=row) + + arguments = {} + for parameter in parameters: + param_name = parameter.name + column_name = self._parameter_column_map.get(param_name, param_name) + if column_name in row.index: + # Get value from dataframe row + value = row.loc[column_name] + elif param_name in self._parameter_defaults: + # Fallback on default values from constructor + value = self._parameter_defaults[param_name] + elif parameter.has_default(): + # Explicitly use default value from parameter schema + value = parameter.default + elif parameter.optional: + # Skip optional parameters without any fallback default value + continue + else: + raise ValueError(f"Missing required parameter {param_name !r} for process {process_id!r}") + + # Prepare some values/dtypes for JSON encoding + if isinstance(value, numpy.integer): + value = int(value) + elif isinstance(value, numpy.number): + value = float(value) + elif isinstance(value, shapely.geometry.base.BaseGeometry): + value = shapely.geometry.mapping(value) + + arguments[param_name] = value + + cube = connection.datacube_from_process(process_id=process_id, namespace=self._namespace, **arguments) + + title = row.get("title", f"Process {process_id!r} with {repr_truncate(arguments)}") + description = row.get("description", f"Process {process_id!r} (namespace {self._namespace}) with {arguments}") + job = connection.create_job(cube, title=title, description=description) + + return job
+ + +
+[docs] + def __call__(self, *arg, **kwargs) -> BatchJob: + """Syntactic sugar for calling :py:meth:`start_job`.""" + return self.start_job(*arg, **kwargs)
+ + + @staticmethod + def _guess_parameter_column_map(parameters: List[Parameter], row: pd.Series) -> dict: + """ + Guess parameter-column mapping from given parameter list and dataframe row + """ + parameter_column_map = {} + # Geometry based mapping: try to automatically map geometry columns to geojson parameters + geojson_parameters = [p.name for p in parameters if p.schema.accepts_geojson()] + geometry_columns = [i for (i, v) in row.items() if isinstance(v, shapely.geometry.base.BaseGeometry)] + if geojson_parameters and geometry_columns: + if len(geojson_parameters) == 1 and len(geometry_columns) == 1: + # Most common case: one geometry parameter and one geometry column: can be mapped naively + parameter_column_map[geojson_parameters[0]] = geometry_columns[0] + elif all(p in geometry_columns for p in geojson_parameters): + # Each geometry param has geometry column with same name: easy to map + parameter_column_map.update((p, p) for p in geojson_parameters) + else: + raise RuntimeError( + f"Problem with mapping geometry columns ({geometry_columns}) to process parameters ({geojson_parameters})" + ) + _log.debug(f"Guessed parameter-column map: {parameter_column_map}") + return parameter_column_map
+ +
+ +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/openeo/extra/spectral_indices/spectral_indices.html b/_modules/openeo/extra/spectral_indices/spectral_indices.html new file mode 100644 index 000000000..509a018d4 --- /dev/null +++ b/_modules/openeo/extra/spectral_indices/spectral_indices.html @@ -0,0 +1,620 @@ + + + + + + + openeo.extra.spectral_indices.spectral_indices — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +

Source code for openeo.extra.spectral_indices.spectral_indices

+import functools
+import json
+import re
+from typing import Dict, List, Optional, Set
+
+from openeo import BaseOpenEoException
+from openeo.processes import ProcessBuilder, array_create, array_modify
+from openeo.rest.datacube import DataCube
+
+try:
+    import importlib_resources
+except ImportError:
+    import importlib.resources as importlib_resources
+
+
+@functools.lru_cache(maxsize=1)
+def load_indices() -> Dict[str, dict]:
+    """Load set of supported spectral indices."""
+    # TODO: encapsulate all this json loading in a single Awesome Spectral Indices registry class?
+    specs = {}
+
+    for path in [
+        "resources/awesome-spectral-indices/spectral-indices-dict.json",
+        # TODO #506 Deprecate extra-indices-dict.json as a whole
+        #      and provide an alternative mechanism to work with custom indices
+        "resources/extra-indices-dict.json",
+    ]:
+        with importlib_resources.files("openeo.extra.spectral_indices") / path as resource_path:
+            data = json.loads(resource_path.read_text(encoding="utf8"))
+            overwrites = set(specs.keys()).intersection(data["SpectralIndices"].keys())
+            if overwrites:
+                raise RuntimeError(f"Duplicate spectral indices: {overwrites} from {path}")
+            specs.update(data["SpectralIndices"])
+
+    return specs
+
+
+@functools.lru_cache(maxsize=1)
+def load_constants() -> Dict[str, float]:
+    """Load constants defined by Awesome Spectral Indices."""
+    # TODO: encapsulate all this json loading in a single Awesome Spectral Indices registry class?
+    with importlib_resources.files(
+        "openeo.extra.spectral_indices"
+    ) / "resources/awesome-spectral-indices/constants.json" as resource_path:
+        data = json.loads(resource_path.read_text(encoding="utf8"))
+
+    return {k: v["default"] for k, v in data.items() if isinstance(v["default"], (int, float))}
+
+
+@functools.lru_cache(maxsize=1)
+def _load_bands() -> Dict[str, dict]:
+    """Load band name mapping defined by Awesome Spectral Indices."""
+    # TODO: encapsulate all this json loading in a single Awesome Spectral Indices registry class?
+    with importlib_resources.files(
+        "openeo.extra.spectral_indices"
+    ) / "resources/awesome-spectral-indices/bands.json" as resource_path:
+        data = json.loads(resource_path.read_text(encoding="utf8"))
+    return data
+
+
+class BandMappingException(BaseOpenEoException):
+    """Failure to determine band-variable mapping."""
+
+
+class _BandMapping:
+    """
+    Helper class to extract mappings between band names and variable names used in Awesome Spectral Indices formulas.
+    """
+
+    _EXTRA = {
+        "sentinel1": {"HH": "HH", "HV": "HV", "VH": "VH", "VV": "VV"},
+    }
+
+    def __init__(self):
+        # Load bands.json from Awesome Spectral Indices
+        self._band_data = _load_bands()
+
+    @staticmethod
+    def _normalize_platform(platform: str) -> str:
+        platform = platform.lower().replace("-", "").replace(" ", "")
+        if platform in {"sentinel2a", "sentinel2b"}:
+            platform = "sentinel2"
+        return platform
+
+    @staticmethod
+    def _normalize_band_name(band_name: str) -> str:
+        band_name = band_name.upper()
+        # Normalize band names like "B01" to "B1"
+        band_name = re.sub(r"^B0+(\d+)$", r"B\1", band_name)
+        return band_name
+
+    @functools.lru_cache(maxsize=1)
+    def get_platforms(self) -> Set[str]:
+        """Get list of supported (normalized) satellite platforms."""
+        platforms = {p for var_data in self._band_data.values() for p in var_data.get("platforms", {}).keys()}
+        platforms.update(self._EXTRA.keys())
+        platforms.update({self._normalize_platform(p) for p in platforms})
+        return platforms
+
+    def guess_platform(self, name: str) -> str:
+        """Guess platform from given collection id or name."""
+        # First check original id, then retry with removed separators as last resort.
+        for haystack in [name.lower(), re.sub("[_ -]", "", name.lower())]:
+            for platform in sorted(self.get_platforms(), key=len, reverse=True):
+                if platform in haystack:
+                    return platform
+        raise BandMappingException(f"Unable to guess satellite platform from id {name!r}.")
+
+    def variable_to_band_name_map(self, platform: str) -> Dict[str, str]:
+        """
+        Build mapping from Awesome Spectral Indices variable names to (normalized) band names for given satellite platform.
+        """
+        platform_normalized = self._normalize_platform(platform)
+        if platform_normalized in self._EXTRA:
+            return self._EXTRA[platform_normalized]
+
+        var_to_band = {
+            var: pf_data["band"]
+            for var, var_data in self._band_data.items()
+            for pf, pf_data in var_data.get("platforms", {}).items()
+            if self._normalize_platform(pf) == platform_normalized
+        }
+        if not var_to_band:
+            raise BandMappingException(f"Empty band mapping derived for satellite platform {platform!r}")
+        return var_to_band
+
+    def actual_band_name_to_variable_map(self, platform: str, band_names: List[str]) -> Dict[str, str]:
+        """Build mapping from actual band names (as given) to Awesome Spectral Indices variable names."""
+        var_to_band = self.variable_to_band_name_map(platform=platform)
+        band_to_var = {
+            band_name: var
+            for var, normalized_band_name in var_to_band.items()
+            for band_name in band_names
+            if self._normalize_band_name(band_name) == normalized_band_name
+        }
+        return band_to_var
+
+
+
+[docs] +def list_indices() -> List[str]: + """List names of supported spectral indices""" + specs = load_indices() + return list(specs.keys())
+ + + +def _check_params(item, params): + range_vals = ["input_range", "output_range"] + if set(params) != set(range_vals): + raise ValueError( + f"You have set the parameters {params} on {item}, while the following are required {range_vals}" + ) + for rng in range_vals: + if params[rng] is None: + continue + if len(params[rng]) != 2: + raise ValueError( + f"The list of provided values {params[rng]} for parameter {rng} for {item} is not of length 2" + ) + # TODO: allow float too? + if not all(isinstance(val, int) for val in params[rng]): + raise ValueError("The ranges you supplied are not all of type int") + if (params["input_range"] is None) != (params["output_range"] is None): + raise ValueError(f"The index_range and output_range of {item} should either be both supplied, or both None") + + +def _check_validity_index_dict(index_dict: dict, index_specs: dict): + # TODO: this `index_dict` API needs some more rethinking: + # - the dictionary has no explicit order of indices, which can be important for end user + # - allow "collection" to be missing (e.g. if no rescaling is desired, or input data is not kept)? + # - option to define default output range, instead of having it to specify it for each index? + # - keep "rescaling" feature separate/orthogonal from "spectral indices" feature. It could be useful as + # a more generic machine learning data preparation feature + input_vals = ["collection", "indices"] + if set(index_dict.keys()) != set(input_vals): + raise ValueError( + f"The first level of the dictionary should contain the keys 'collection' and 'indices', but they contain {index_dict.keys()}" + ) + _check_params("collection", index_dict["collection"]) + for index, params in index_dict["indices"].items(): + if index not in index_specs.keys(): + raise NotImplementedError("Index " + index + " is not supported.") + _check_params(index, params) + + +def _callback( + x: ProcessBuilder, + index_dict: dict, + index_specs: dict, + append: bool, + band_names: List[str], + band_to_var: Dict[str, str], +) -> ProcessBuilder: + index_values = [] + x_res = x + + # TODO: use `label` parameter of `array_element` to avoid index based band references + variables = {band_to_var[bn]: x.array_element(i) for i, bn in enumerate(band_names) if bn in band_to_var} + eval_globals = { + **load_constants(), + **variables, + } + # TODO: user might want to control order of indices, which is tricky through a dictionary. + for index, params in index_dict["indices"].items(): + index_result = eval(index_specs[index]["formula"], eval_globals) + if params["input_range"] is not None: + index_result = index_result.linear_scale_range(*params["input_range"], *params["output_range"]) + index_values.append(index_result) + if index_dict["collection"]["input_range"] is not None: + x_res = x_res.linear_scale_range( + *index_dict["collection"]["input_range"], *index_dict["collection"]["output_range"] + ) + if append: + return array_modify(data=x_res, values=index_values, index=len(band_names)) + else: + return array_create(data=index_values) + + +
+[docs] +def compute_and_rescale_indices( + datacube: DataCube, + index_dict: dict, + *, + append: bool = False, + variable_map: Optional[Dict[str, str]] = None, + platform: Optional[str] = None, +) -> DataCube: + """ + Computes a list of indices from a data cube + + :param datacube: input data cube + :param index_dict: a dictionary that contains the input- and output range of the collection on which you calculate the indices + as well as the indices that you want to calculate with their responding input- and output ranges + It follows the following format:: + + { + "collection": { + "input_range": [0,8000], + "output_range": [0,250] + }, + "indices": { + "NDVI": { + "input_range": [-1,1], + "output_range": [0,250] + }, + } + } + + If you don't want to rescale your data, you can fill the input-, index- and output-range with ``None``. + + See `list_indices()` for supported indices. + + :param append: append the indices as bands to the given data cube + instead of creating a new cube with only the calculated indices + :param variable_map: (optional) mapping from Awesome Spectral Indices formula variable to actual cube band names. + To be specified if the given data cube has non-standard band names, + or the satellite platform can not be recognized from the data cube metadata. + See :ref:`spectral_indices_manual_band_mapping` for more information. + :param platform: optionally specify the satellite platform (to determine band name mapping) + if the given data cube has no or an unhandled collection id in its metadata. + See :ref:`spectral_indices_manual_band_mapping` for more information. + + :return: the datacube with the indices attached as bands + + .. warning:: this "rescaled" index helper uses an experimental API (e.g. `index_dict` argument) that is subject to change. + + .. versionadded:: 0.26.0 + Added `variable_map` and `platform` arguments. + + """ + index_specs = load_indices() + + _check_validity_index_dict(index_dict, index_specs) + + if variable_map is None: + # Automatic band mapping + band_mapping = _BandMapping() + if platform is None: + if datacube.metadata and datacube.metadata.get("id"): + platform = band_mapping.guess_platform(name=datacube.metadata.get("id")) + else: + raise BandMappingException("Unable to determine satellite platform from data cube metadata") + band_to_var = band_mapping.actual_band_name_to_variable_map( + platform=platform, band_names=datacube.metadata.band_names + ) + else: + band_to_var = {b: v for v, b in variable_map.items()} + + res = datacube.apply_dimension( + dimension="bands", + process=lambda x: _callback( + x, + index_dict=index_dict, + index_specs=index_specs, + append=append, + band_names=datacube.metadata.band_names, + band_to_var=band_to_var, + ), + ) + if append: + return res.rename_labels("bands", target=datacube.metadata.band_names + list(index_dict["indices"].keys())) + else: + return res.rename_labels("bands", target=list(index_dict["indices"].keys()))
+ + + +
+[docs] +def append_and_rescale_indices( + datacube: DataCube, + index_dict: dict, + *, + variable_map: Optional[Dict[str, str]] = None, + platform: Optional[str] = None, +) -> DataCube: + """ + Computes a list of indices from a datacube and appends them to the existing datacube + + :param datacube: input data cube + :param index_dict: a dictionary that contains the input- and output range of the collection on which you calculate the indices + as well as the indices that you want to calculate with their responding input- and output ranges + It follows the following format:: + + { + "collection": { + "input_range": [0,8000], + "output_range": [0,250] + }, + "indices": { + "NDVI": { + "input_range": [-1,1], + "output_range": [0,250] + }, + } + } + + See `list_indices()` for supported indices. + + :param variable_map: (optional) mapping from Awesome Spectral Indices formula variable to actual cube band names. + To be specified if the given data cube has non-standard band names, + or the satellite platform can not be recognized from the data cube metadata. + See :ref:`spectral_indices_manual_band_mapping` for more information. + :param platform: optionally specify the satellite platform (to determine band name mapping) + if the given data cube has no or an unhandled collection id in its metadata. + See :ref:`spectral_indices_manual_band_mapping` for more information. + + :return: data cube with appended indices + + .. warning:: this "rescaled" index helper uses an experimental API (e.g. `index_dict` argument) that is subject to change. + + .. versionadded:: 0.26.0 + Added `variable_map` and `platform` arguments. + """ + return compute_and_rescale_indices( + datacube=datacube, index_dict=index_dict, append=True, variable_map=variable_map, platform=platform + )
+ + + +
+[docs] +def compute_indices( + datacube: DataCube, + indices: List[str], + *, + append: bool = False, + variable_map: Optional[Dict[str, str]] = None, + platform: Optional[str] = None, +) -> DataCube: + """ + Compute multiple spectral indices from the given data cube. + + :param datacube: input data cube + :param indices: list of names of the indices to compute and append. See `list_indices()` for supported indices. + :param append: append the indices as bands to the given data cube + instead of creating a new cube with only the calculated indices + :param variable_map: (optional) mapping from Awesome Spectral Indices formula variable to actual cube band names. + To be specified if the given data cube has non-standard band names, + or the satellite platform can not be recognized from the data cube metadata. + See :ref:`spectral_indices_manual_band_mapping` for more information. + :param platform: optionally specify the satellite platform (to determine band name mapping) + if the given data cube has no or an unhandled collection id in its metadata. + See :ref:`spectral_indices_manual_band_mapping` for more information. + + :return: data cube containing the indices as bands + + .. versionadded:: 0.26.0 + Added `variable_map` and `platform` arguments. + """ + # TODO: it's bit weird to have to specify all these None's in this structure + index_dict = { + "collection": { + "input_range": None, + "output_range": None, + }, + "indices": {index: {"input_range": None, "output_range": None} for index in indices}, + } + return compute_and_rescale_indices( + datacube=datacube, index_dict=index_dict, append=append, variable_map=variable_map, platform=platform + )
+ + + +
+[docs] +def append_indices( + datacube: DataCube, + indices: List[str], + *, + variable_map: Optional[Dict[str, str]] = None, + platform: Optional[str] = None, +) -> DataCube: + """ + Compute multiple spectral indices and append them to the given data cube. + + :param datacube: input data cube + :param indices: list of names of the indices to compute and append. See `list_indices()` for supported indices. + :param variable_map: (optional) mapping from Awesome Spectral Indices formula variable to actual cube band names. + To be specified if the given data cube has non-standard band names, + or the satellite platform can not be recognized from the data cube metadata. + See :ref:`spectral_indices_manual_band_mapping` for more information. + :param platform: optionally specify the satellite platform (to determine band name mapping) + if the given data cube has no or an unhandled collection id in its metadata. + See :ref:`spectral_indices_manual_band_mapping` for more information. + + :return: data cube with appended indices + + .. versionadded:: 0.26.0 + Added `variable_map` and `platform` arguments. + """ + + return compute_indices( + datacube=datacube, indices=indices, append=True, variable_map=variable_map, platform=platform + )
+ + + +
+[docs] +def compute_index( + datacube: DataCube, index: str, *, variable_map: Optional[Dict[str, str]] = None, platform: Optional[str] = None +) -> DataCube: + """ + Compute a single spectral index from a data cube. + + :param datacube: input data cube + :param index: name of the index to compute. See `list_indices()` for supported indices. + :param variable_map: (optional) mapping from Awesome Spectral Indices formula variable to actual cube band names. + To be specified if the given data cube has non-standard band names, + or the satellite platform can not be recognized from the data cube metadata. + See :ref:`spectral_indices_manual_band_mapping` for more information. + :param platform: optionally specify the satellite platform (to determine band name mapping) + if the given data cube has no or an unhandled collection id in its metadata. + See :ref:`spectral_indices_manual_band_mapping` for more information. + + :return: data cube containing the index as band + + .. versionadded:: 0.26.0 + Added `variable_map` and `platform` arguments. + """ + # TODO: option to compute the index with `reduce_dimension` instead of `apply_dimension`? + return compute_indices( + datacube=datacube, indices=[index], append=False, variable_map=variable_map, platform=platform + )
+ + + +
+[docs] +def append_index( + datacube: DataCube, index: str, *, variable_map: Optional[Dict[str, str]] = None, platform: Optional[str] = None +) -> DataCube: + """ + Compute a single spectral index and append it to the given data cube. + + :param cube: input data cube + :param index: name of the index to compute and append. See `list_indices()` for supported indices. + :param variable_map: (optional) mapping from Awesome Spectral Indices formula variable to actual cube band names. + To be specified if the given data cube has non-standard band names, + or the satellite platform can not be recognized from the data cube metadata. + See :ref:`spectral_indices_manual_band_mapping` for more information. + :param platform: optionally specify the satellite platform (to determine band name mapping) + if the given data cube has no or an unhandled collection id in its metadata. + See :ref:`spectral_indices_manual_band_mapping` for more information. + + :return: data cube with appended index + + .. versionadded:: 0.26.0 + Added `variable_map` and `platform` arguments. + """ + return compute_indices( + datacube=datacube, indices=[index], append=True, variable_map=variable_map, platform=platform + )
+ +
+ +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/openeo/internal/graph_building.html b/_modules/openeo/internal/graph_building.html new file mode 100644 index 000000000..4ad187ef0 --- /dev/null +++ b/_modules/openeo/internal/graph_building.html @@ -0,0 +1,630 @@ + + + + + + + openeo.internal.graph_building — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +

Source code for openeo.internal.graph_building

+"""
+Internal openEO process graph building utilities
+''''''''''''''''''''''''''''''''''''''''''''''''''
+
+Internal functionality for abstracting, building, manipulating and processing openEO process graphs.
+
+"""
+
+from __future__ import annotations
+
+import abc
+import collections
+import copy
+import json
+import sys
+from contextlib import nullcontext
+from pathlib import Path
+from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union
+
+from openeo.api.process import Parameter
+from openeo.internal.process_graph_visitor import (
+    ProcessGraphUnflattener,
+    ProcessGraphVisitException,
+    ProcessGraphVisitor,
+)
+from openeo.util import dict_no_none, load_json_resource
+
+
+
+[docs] +class FlatGraphableMixin(metaclass=abc.ABCMeta): + """ + Mixin for classes that can be exported/converted to + a "flat graph" representation of an openEO process graph. + """ + + @abc.abstractmethod + def flat_graph(self) -> Dict[str, dict]: + ... + +
+[docs] + def to_json(self, *, indent: Union[int, None] = 2, separators: Optional[Tuple[str, str]] = None) -> str: + """ + Get interoperable JSON representation of the process graph. + + See :py:meth:`DataCube.print_json` to directly print the JSON representation + and :ref:`process_graph_export` for more usage information. + + Also see ``json.dumps`` docs for more information on the JSON formatting options. + + :param indent: JSON indentation level. + :param separators: (optional) tuple of item/key separators. + :return: JSON string + """ + pg = {"process_graph": self.flat_graph()} + return json.dumps(pg, indent=indent, separators=separators)
+ + +
+[docs] + def print_json( + self, + *, + file=None, + indent: Union[int, None] = 2, + separators: Optional[Tuple[str, str]] = None, + end: str = "\n", + ): + """ + Print interoperable JSON representation of the process graph. + + See :py:meth:`DataCube.to_json` to get the JSON representation as a string + and :ref:`process_graph_export` for more usage information. + + Also see ``json.dumps`` docs for more information on the JSON formatting options. + + :param file: file-like object (stream) to print to (current ``sys.stdout`` by default). + Or a path (string or pathlib.Path) to a file to write to. + :param indent: JSON indentation level. + :param separators: (optional) tuple of item/key separators. + :param end: additional string to be printed at the end (newline by default). + + .. versionadded:: 0.12.0 + + .. versionadded:: 0.23.0 + added the ``end`` argument. + """ + pg = {"process_graph": self.flat_graph()} + if isinstance(file, (str, Path)): + # Create (new) file and automatically close it + file_ctx = Path(file).open("w", encoding="utf8") + else: + # Just use file as-is, but don't close it automatically. + file_ctx = nullcontext(enter_result=file or sys.stdout) + with file_ctx as f: + json.dump(pg, f, indent=indent, separators=separators) + if end: + f.write(end)
+
+ + + +class _FromNodeMixin(abc.ABC): + """Mixin for classes that want to hook into the generation of a "from_node" reference.""" + + @abc.abstractmethod + def from_node(self) -> PGNode: + # TODO: "from_node" is a bit a confusing name: + # it refers to the "from_node" node reference in openEO process graphs, + # but as a method name here it reads like "construct from PGNode", + # while it is actually meant as "export as PGNode" (that can be used in a "from_node" reference). + pass + + +
+[docs] +class PGNode(_FromNodeMixin, FlatGraphableMixin): + """ + A process node in a process graph: has at least a process_id and arguments. + + Note that a full openEO "process graph" is essentially a directed acyclic graph of nodes + pointing to each other. A full process graph is practically equivalent with its "result" node, + as it points (directly or indirectly) to all the other nodes it depends on. + + .. warning:: + This class is an implementation detail meant for internal use. + It is not recommended for general use in normal user code. + Instead, use process graph abstraction builders like + :py:meth:`Connection.load_collection() <openeo.rest.connection.Connection.load_collection>`, + :py:meth:`Connection.datacube_from_process() <openeo.rest.connection.Connection.datacube_from_process>`, + :py:meth:`Connection.datacube_from_flat_graph() <openeo.rest.connection.Connection.datacube_from_flat_graph>`, + :py:meth:`Connection.datacube_from_json() <openeo.rest.connection.Connection.datacube_from_json>`, + :py:meth:`Connection.load_ml_model() <openeo.rest.connection.Connection.load_ml_model>`, + :py:func:`openeo.processes.process()`, + + """ + + __slots__ = ["_process_id", "_arguments", "_namespace"] + + def __init__(self, process_id: str, arguments: dict = None, namespace: Union[str, None] = None, **kwargs): + self._process_id = process_id + # Merge arguments dict and kwargs + arguments = dict(**(arguments or {}), **kwargs) + # Make sure direct PGNode arguments are properly wrapped in a "from_node" dict + for arg, value in arguments.items(): + if isinstance(value, _FromNodeMixin): + arguments[arg] = {"from_node": value.from_node()} + elif isinstance(value, list): + for index, arrayelement in enumerate(value): + if isinstance(arrayelement, _FromNodeMixin): + value[index] = {"from_node": arrayelement.from_node()} + # TODO: use a frozendict of some sort to ensure immutability? + self._arguments = arguments + self._namespace = namespace + + def from_node(self): + return self + + def __repr__(self): + return "<{c} {p!r} at 0x{m:x}>".format(c=self.__class__.__name__, p=self.process_id, m=id(self)) + + @property + def process_id(self) -> str: + return self._process_id + + @property + def arguments(self) -> dict: + return self._arguments + + @property + def namespace(self) -> Union[str, None]: + return self._namespace + +
+[docs] + def update_arguments(self, **kwargs): + """ + Add/Update arguments of the process node. + + .. versionadded:: 0.10.1 + """ + self._arguments = {**self._arguments, **kwargs}
+ + + def _as_tuple(self): + return (self._process_id, self._arguments, self._namespace) + + def __eq__(self, other): + return isinstance(other, type(self)) and self._as_tuple() == other._as_tuple() + +
+[docs] + def to_dict(self) -> dict: + """ + Convert process graph to a nested dictionary structure. + Uses deep copy style: nodes that are reused in graph will be deduplicated + """ + + def _deep_copy(x): + """PGNode aware deep copy helper""" + if isinstance(x, PGNode): + return dict_no_none(process_id=x.process_id, arguments=_deep_copy(x.arguments), namespace=x.namespace) + if isinstance(x, Parameter): + return {"from_parameter": x.name} + elif isinstance(x, dict): + return {str(k): _deep_copy(v) for k, v in x.items()} + elif isinstance(x, (list, tuple)): + return type(x)(_deep_copy(v) for v in x) + elif isinstance(x, (str, int, float)) or x is None: + return x + else: + raise ValueError(repr(x)) + + return _deep_copy(self)
+ + +
+[docs] + def flat_graph(self) -> Dict[str, dict]: + """Get the process graph in internal flat dict representation.""" + return GraphFlattener().flatten(node=self)
+ + +
+[docs] + @staticmethod + def to_process_graph_argument(value: Union["PGNode", str, dict]) -> dict: + """ + Normalize given argument properly to a "process_graph" argument + to be used as reducer/subprocess for processes like + ``reduce_dimension``, ``aggregate_spatial``, ``apply``, ``merge_cubes``, ``resample_cube_temporal`` + """ + if isinstance(value, str): + # assume string with predefined reduce/apply process ("mean", "sum", ...) + # TODO: is this case still used? It's invalid anyway for 1.0 openEO spec I think? + return value + elif isinstance(value, PGNode): + return {"process_graph": value} + elif isinstance(value, dict) and isinstance(value.get("process_graph"), PGNode): + return value + else: + raise ValueError(value)
+ + +
+[docs] + @staticmethod + def from_flat_graph(flat_graph: dict, parameters: Optional[dict] = None) -> PGNode: + """Unflatten a given flat dict representation of a process graph and return result node.""" + return PGNodeGraphUnflattener.unflatten(flat_graph=flat_graph, parameters=parameters)
+ + + +
+[docs] + def walk_nodes(self) -> Iterator[PGNode]: + """Walk this node and all it's parents""" + # TODO: option to do deep walk (walk through child graphs too)? + yield self + + def walk(x) -> Iterator[PGNode]: + if isinstance(x, PGNode): + yield from x.walk_nodes() + elif isinstance(x, dict): + for v in x.values(): + yield from walk(v) + elif isinstance(x, (list, tuple)): + for v in x: + yield from walk(v) + + yield from walk(self.arguments)
+
+ + + +def as_flat_graph(x: Union[dict, FlatGraphableMixin, Path, List[FlatGraphableMixin], Any]) -> Dict[str, dict]: + """ + Convert given object to a internal flat dict graph representation. + """ + # TODO: document or verify which process graph flavor this is: + # including `{"process": {"process_graph": {nodes}}` ("process graph with metadata") + # including `{"process_graph": {nodes}}` ("process graph") + # or just the raw process graph nodes? + if isinstance(x, dict): + # Assume given dict is already a flat graph representation + return x + elif isinstance(x, FlatGraphableMixin): + return x.flat_graph() + elif isinstance(x, (str, Path)): + # Assume a JSON resource (raw JSON, path to local file, JSON url, ...) + return load_json_resource(x) + elif isinstance(x, (list, tuple)) and all(isinstance(i, FlatGraphableMixin) for i in x): + return MultiLeafGraph(x).flat_graph() + raise ValueError(x) + + +class ReduceNode(PGNode): + """ + A process graph node for "reduce" processes (has a reducer sub-process-graph) + """ + + def __init__( + self, + data: _FromNodeMixin, + reducer: Union[PGNode, str, dict], + dimension: str, + context=None, + process_id="reduce_dimension", + band_math_mode: bool = False, + ): + assert process_id in ("reduce_dimension", "reduce_dimension_binary") + arguments = { + "data": data, + "reducer": self.to_process_graph_argument(reducer), + "dimension": dimension, + } + if context is not None: + arguments["context"] = context + super().__init__(process_id=process_id, arguments=arguments) + # TODO #123 is it (still) necessary to make "band" math a special case? + self.band_math_mode = band_math_mode + + @property + def dimension(self): + return self.arguments["dimension"] + + def reducer_process_graph(self) -> PGNode: + return self.arguments["reducer"]["process_graph"] + + def clone_with_new_reducer(self, reducer: PGNode) -> ReduceNode: + """Copy/clone this reduce node: keep input reference, but use new reducer""" + return ReduceNode( + data=self.arguments["data"]["from_node"], + reducer=reducer, + dimension=self.arguments["dimension"], + band_math_mode=self.band_math_mode, + context=self.arguments.get("context"), + ) + + +class FlatGraphNodeIdGenerator: + """ + Helper class to generate unique node ids (e.g. autoincrement style) + for processes in a flat process graph. + """ + + def __init__(self): + self._counters = collections.defaultdict(int) + + def generate(self, process_id: str): + """Generate new key for given process id.""" + self._counters[process_id] += 1 + return "{p}{c}".format(p=process_id.replace("_", ""), c=self._counters[process_id]) + + +class GraphFlattener(ProcessGraphVisitor): + + def __init__(self, node_id_generator: FlatGraphNodeIdGenerator = None, multi_input_mode: bool = False): + super().__init__() + self._node_id_generator = node_id_generator or FlatGraphNodeIdGenerator() + self._last_node_id = None + self._flattened: Dict[str, dict] = {} + self._argument_stack = [] + self._node_cache = {} + self._multi_input_mode = multi_input_mode + + def flatten(self, node: PGNode) -> Dict[str, dict]: + """Consume given nested process graph and return flat dict representation""" + if self._flattened and not self._multi_input_mode: + raise RuntimeError("Flattening multiple graphs, but not in multi-input mode") + self.accept_node(node) + assert len(self._argument_stack) == 0 + return self.flattened(set_result_flag=not self._multi_input_mode) + + def flattened(self, set_result_flag: bool = True) -> Dict[str, dict]: + flat_graph = copy.deepcopy(self._flattened) + if set_result_flag: + # TODO #583 an "end" node is not necessarily a "result" node + flat_graph[self._last_node_id]["result"] = True + return flat_graph + + def accept_node(self, node: PGNode): + # Process reused nodes only first time and remember node id. + node_id = id(node) + if node_id not in self._node_cache: + super()._accept_process(process_id=node.process_id, arguments=node.arguments, namespace=node.namespace) + self._node_cache[node_id] = self._last_node_id + else: + self._last_node_id = self._node_cache[node_id] + + def enterProcess(self, process_id: str, arguments: dict, namespace: Union[str, None]): + self._argument_stack.append({}) + + def leaveProcess(self, process_id: str, arguments: dict, namespace: Union[str, None]): + node_id = self._node_id_generator.generate(process_id) + self._flattened[node_id] = dict_no_none( + process_id=process_id, + arguments=self._argument_stack.pop(), + namespace=namespace, + ) + self._last_node_id = node_id + + def _store_argument(self, argument_id: str, value): + if isinstance(value, Parameter): + value = {"from_parameter": value.name} + self._argument_stack[-1][argument_id] = value + + def _store_array_element(self, value): + if isinstance(value, Parameter): + value = {"from_parameter": value.name} + self._argument_stack[-1].append(value) + + def enterArray(self, argument_id: str): + array = [] + self._store_argument(argument_id, array) + self._argument_stack.append(array) + + def leaveArray(self, argument_id: str): + self._argument_stack.pop() + + def arrayElementDone(self, value): + self._store_array_element(self._flatten_argument(value)) + + def constantArrayElement(self, value): + self._store_array_element(self._flatten_argument(value)) + + def _flatten_argument(self, value): + if isinstance(value, dict): + if "from_node" in value: + value = {"from_node": self._last_node_id} + elif "process_graph" in value: + pg = value["process_graph"] + if isinstance(pg, PGNode): + value = {"process_graph": GraphFlattener(node_id_generator=self._node_id_generator).flatten(pg)} + elif isinstance(pg, dict): + # Assume it is already a valid flat graph representation of a subprocess + value = {"process_graph": pg} + else: + raise ValueError(pg) + else: + value = {k: self._flatten_argument(v) for k, v in value.items()} + elif isinstance(value, Parameter): + value = {"from_parameter": value.name} + return value + + def leaveArgument(self, argument_id: str, value): + self._store_argument(argument_id, self._flatten_argument(value)) + + def constantArgument(self, argument_id: str, value): + self._store_argument(argument_id, value) + + +class PGNodeGraphUnflattener(ProcessGraphUnflattener): + """ + Unflatten a flat process graph to a graph of :py:class:`PGNode` objects + + Parameter substitution can also be performed, but is optional: + if the ``parameters=None`` is given, no parameter substitution is done, + if it is a dictionary (even an empty one) is given, every parameter encountered in the process + graph must have an entry for substitution. + """ + + def __init__(self, flat_graph: dict, parameters: Optional[dict] = None): + super().__init__(flat_graph=flat_graph) + self._parameters = parameters + + def _process_node(self, node: dict) -> PGNode: + return PGNode( + process_id=node["process_id"], + arguments=self._process_value(value=node["arguments"]), + namespace=node.get("namespace"), + ) + + def _process_from_node(self, key: str, node: dict) -> PGNode: + return self.get_node(key=key) + + def _process_from_parameter(self, name: str) -> Any: + if self._parameters is None: + return super()._process_from_parameter(name=name) + if name not in self._parameters: + raise ProcessGraphVisitException("No substitution value for parameter {p!r}.".format(p=name)) + return self._parameters[name] + + +class MultiLeafGraph(FlatGraphableMixin): + """ + Container for process graphs with multiple leaf/result nodes. + """ + + __slots__ = ["_leaves"] + + def __init__(self, leaves: Iterable[FlatGraphableMixin]): + self._leaves = list(leaves) + + def flat_graph(self) -> Dict[str, dict]: + flattener = GraphFlattener(multi_input_mode=True) + for leaf in self._leaves: + if isinstance(leaf, PGNode): + flattener.flatten(leaf) + elif isinstance(leaf, _FromNodeMixin): + flattener.flatten(leaf.from_node()) + else: + raise ValueError(f"Unsupported type {type(leaf)}") + + return flattener.flattened(set_result_flag=True) +
+ +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/openeo/metadata.html b/_modules/openeo/metadata.html new file mode 100644 index 000000000..b77c2aac3 --- /dev/null +++ b/_modules/openeo/metadata.html @@ -0,0 +1,874 @@ + + + + + + + openeo.metadata — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +

Source code for openeo.metadata

+from __future__ import annotations
+
+import functools
+import logging
+import warnings
+from typing import Any, Callable, Dict, List, NamedTuple, Optional, Set, Tuple, Union
+
+import pystac
+import pystac.extensions.datacube
+import pystac.extensions.eo
+import pystac.extensions.item_assets
+
+from openeo.internal.jupyter import render_component
+from openeo.util import Rfc3339, deep_get
+
+_log = logging.getLogger(__name__)
+
+
+class MetadataException(Exception):
+    pass
+
+
+class DimensionAlreadyExistsException(MetadataException):
+    pass
+
+
+# TODO: make these dimension classes immutable data classes
+class Dimension:
+    """Base class for dimensions."""
+
+    def __init__(self, type: str, name: str):
+        self.type = type
+        self.name = name
+
+    def __repr__(self):
+        return "{c}({f})".format(
+            c=self.__class__.__name__,
+            f=", ".join("{k!s}={v!r}".format(k=k, v=v) for (k, v) in self.__dict__.items())
+        )
+
+    def __eq__(self, other):
+        return self.__class__ == other.__class__ and self.__dict__ == other.__dict__
+
+    def rename(self, name) -> Dimension:
+        """Create new dimension with new name."""
+        return Dimension(type=self.type, name=name)
+
+    def rename_labels(self, target, source) -> Dimension:
+        """
+        Rename labels, if the type of dimension allows it.
+
+        :param target: List of target labels
+        :param source: Source labels, or empty list
+        :return: A new dimension with modified labels, or the same if no change is applied.
+        """
+        # In general, we don't have/manage label info here, so do nothing.
+        return Dimension(type=self.type, name=self.name)
+
+
+
+[docs] +class SpatialDimension(Dimension): + DEFAULT_CRS = 4326 + + def __init__( + self, + name: str, + extent: Union[Tuple[float, float], List[float]], + crs: Union[str, int, dict] = DEFAULT_CRS, + step=None, + ): + """ + + @param name: + @param extent: + @param crs: + @param step: The space between the values. Use null for irregularly spaced steps. + """ + super().__init__(type="spatial", name=name) + self.extent = extent + self.crs = crs + self.step = step + +
+[docs] + def rename(self, name) -> Dimension: + return SpatialDimension(name=name, extent=self.extent, crs=self.crs, step=self.step)
+
+ + + +
+[docs] +class TemporalDimension(Dimension): + def __init__(self, name: str, extent: Union[Tuple[str, str], List[str]]): + super().__init__(type="temporal", name=name) + self.extent = extent + +
+[docs] + def rename(self, name) -> Dimension: + return TemporalDimension(name=name, extent=self.extent)
+ + +
+[docs] + def rename_labels(self, target, source) -> Dimension: + # TODO should we check if the extent has changed with the new labels? + return TemporalDimension(name=self.name, extent=self.extent)
+
+ + + +class Band(NamedTuple): + """ + Simple container class for band metadata. + Based on https://github.com/stac-extensions/eo#band-object + """ + + name: str + common_name: Optional[str] = None + # wavelength in micrometer + wavelength_um: Optional[float] = None + aliases: Optional[List[str]] = None + # "openeo:gsd" field (https://github.com/Open-EO/openeo-stac-extensions#GSD-Object) + gsd: Optional[dict] = None + + +
+[docs] +class BandDimension(Dimension): + # TODO #575 support unordered bands and avoid assumption that band order is known. + def __init__(self, name: str, bands: List[Band]): + super().__init__(type="bands", name=name) + self.bands = bands + + @property + def band_names(self) -> List[str]: + return [b.name for b in self.bands] + + @property + def band_aliases(self) -> List[List[str]]: + return [b.aliases for b in self.bands] + + @property + def common_names(self) -> List[str]: + return [b.common_name for b in self.bands] + +
+[docs] + def band_index(self, band: Union[int, str]) -> int: + """ + Resolve a given band (common) name/index to band index + + :param band: band name, common name or index + :return int: band index + """ + band_names = self.band_names + if isinstance(band, int) and 0 <= band < len(band_names): + return band + elif isinstance(band, str): + common_names = self.common_names + # First try common names if possible + if band in common_names: + return common_names.index(band) + if band in band_names: + return band_names.index(band) + # Check band aliases to still support old band names + aliases = [True if aliases and band in aliases else False for aliases in self.band_aliases] + if any(aliases): + return aliases.index(True) + raise ValueError("Invalid band name/index {b!r}. Valid names: {n!r}".format(b=band, n=band_names))
+ + +
+[docs] + def band_name(self, band: Union[str, int], allow_common=True) -> str: + """Resolve (common) name or index to a valid (common) name""" + if isinstance(band, str): + if band in self.band_names: + return band + elif band in self.common_names: + if allow_common: + return band + else: + return self.band_names[self.common_names.index(band)] + elif any([True if aliases and band in aliases else False for aliases in self.band_aliases]): + return self.band_names[self.band_index(band)] + elif isinstance(band, int) and 0 <= band < len(self.bands): + return self.band_names[band] + raise ValueError("Invalid band name/index {b!r}. Valid names: {n!r}".format(b=band, n=self.band_names))
+ + +
+[docs] + def filter_bands(self, bands: List[Union[int, str]]) -> BandDimension: + """ + Construct new BandDimension with subset of bands, + based on given band indices or (common) names + """ + return BandDimension( + name=self.name, + bands=[self.bands[self.band_index(b)] for b in bands] + )
+ + +
+[docs] + def append_band(self, band: Band) -> BandDimension: + """Create new BandDimension with appended band.""" + if band.name in self.band_names: + raise ValueError("Duplicate band {b!r}".format(b=band)) + + return BandDimension( + name=self.name, + bands=self.bands + [band] + )
+ + +
+[docs] + def rename_labels(self, target, source) -> Dimension: + if source: + if len(target) != len(source): + raise ValueError( + "In rename_labels, `target` and `source` should have same number of labels, " + "but got: `target` {t} and `source` {s}".format(t=target, s=source) + ) + new_bands = self.bands.copy() + for old_name, new_name in zip(source, target): + band_index = self.band_index(old_name) + the_band = new_bands[band_index] + new_bands[band_index] = Band( + name=new_name, + common_name=the_band.common_name, + wavelength_um=the_band.wavelength_um, + aliases=the_band.aliases, + gsd=the_band.gsd, + ) + else: + new_bands = [Band(name=n) for n in target] + return BandDimension(name=self.name, bands=new_bands)
+ + +
+[docs] + def rename(self, name) -> Dimension: + return BandDimension(name=name, bands=self.bands)
+
+ + +class CubeMetadata: + """ + Interface for metadata of a data cube. + + Allows interaction with the cube dimensions and their labels (if available). + """ + + def __init__(self, dimensions: Optional[List[Dimension]] = None): + # Original collection metadata (actual cube metadata might be altered through processes) + self._dimensions = dimensions + self._band_dimension = None + self._temporal_dimension = None + + if dimensions is not None: + for dim in self._dimensions: + # TODO: here we blindly pick last bands or temporal dimension if multiple. Let user choose? + # TODO: add spacial dimension handling? + if dim.type == "bands": + if isinstance(dim, BandDimension): + self._band_dimension = dim + else: + raise MetadataException("Invalid band dimension {d!r}".format(d=dim)) + if dim.type == "temporal": + if isinstance(dim, TemporalDimension): + self._temporal_dimension = dim + else: + raise MetadataException("Invalid temporal dimension {d!r}".format(d=dim)) + + def __eq__(self, o: Any) -> bool: + return isinstance(o, type(self)) and self._dimensions == o._dimensions + + def _clone_and_update(self, dimensions: Optional[List[Dimension]] = None, **kwargs) -> CubeMetadata: + """Create a new instance (of same class) with copied/updated fields.""" + cls = type(self) + if dimensions is None: + dimensions = self._dimensions + return cls(dimensions=dimensions, **kwargs) + + def dimension_names(self) -> List[str]: + return list(d.name for d in self._dimensions) + + def assert_valid_dimension(self, dimension: str) -> str: + """Make sure given dimension name is valid.""" + names = self.dimension_names() + if dimension not in names: + raise ValueError(f"Invalid dimension {dimension!r}. Should be one of {names}") + return dimension + + def has_band_dimension(self) -> bool: + return isinstance(self._band_dimension, BandDimension) + + @property + def band_dimension(self) -> BandDimension: + """Dimension corresponding to spectral/logic/thematic "bands".""" + if not self.has_band_dimension(): + raise MetadataException("No band dimension") + return self._band_dimension + + def has_temporal_dimension(self) -> bool: + return isinstance(self._temporal_dimension, TemporalDimension) + + @property + def temporal_dimension(self) -> TemporalDimension: + if not self.has_temporal_dimension(): + raise MetadataException("No temporal dimension") + return self._temporal_dimension + + @property + def spatial_dimensions(self) -> List[SpatialDimension]: + return [d for d in self._dimensions if isinstance(d, SpatialDimension)] + + @property + def bands(self) -> List[Band]: + """Get band metadata as list of Band metadata tuples""" + return self.band_dimension.bands + + @property + def band_names(self) -> List[str]: + """Get band names of band dimension""" + return self.band_dimension.band_names + + @property + def band_common_names(self) -> List[str]: + return self.band_dimension.common_names + + def get_band_index(self, band: Union[int, str]) -> int: + # TODO: eliminate this shortcut for smaller API surface + return self.band_dimension.band_index(band) + + def filter_bands(self, band_names: List[Union[int, str]]) -> CubeMetadata: + """ + Create new `CubeMetadata` with filtered band dimension + :param band_names: list of band names/indices to keep + :return: + """ + assert self.band_dimension + return self._clone_and_update( + dimensions=[d.filter_bands(band_names) if isinstance(d, BandDimension) else d for d in self._dimensions] + ) + + def append_band(self, band: Band) -> CubeMetadata: + """ + Create new `CubeMetadata` with given band added to band dimension. + """ + assert self.band_dimension + return self._clone_and_update( + dimensions=[d.append_band(band) if isinstance(d, BandDimension) else d for d in self._dimensions] + ) + + def rename_labels(self, dimension: str, target: list, source: list = None) -> CubeMetadata: + """ + Renames the labels of the specified dimension from source to target. + + :param dimension: Dimension name + :param target: The new names for the labels. + :param source: The names of the labels as they are currently in the data cube. + + :return: Updated metadata + """ + self.assert_valid_dimension(dimension) + loc = self.dimension_names().index(dimension) + new_dimensions = self._dimensions.copy() + new_dimensions[loc] = new_dimensions[loc].rename_labels(target, source) + + return self._clone_and_update(dimensions=new_dimensions) + + def rename_dimension(self, source: str, target: str) -> CubeMetadata: + """ + Rename source dimension into target, preserving other properties + """ + self.assert_valid_dimension(source) + loc = self.dimension_names().index(source) + new_dimensions = self._dimensions.copy() + new_dimensions[loc] = new_dimensions[loc].rename(target) + + return self._clone_and_update(dimensions=new_dimensions) + + def reduce_dimension(self, dimension_name: str) -> CubeMetadata: + """Create new CubeMetadata object by collapsing/reducing a dimension.""" + # TODO: option to keep reduced dimension (with a single value)? + # TODO: rename argument to `name` for more internal consistency + # TODO: merge with drop_dimension (which does the same). + self.assert_valid_dimension(dimension_name) + loc = self.dimension_names().index(dimension_name) + dimensions = self._dimensions[:loc] + self._dimensions[loc + 1 :] + return self._clone_and_update(dimensions=dimensions) + + def reduce_spatial(self) -> CubeMetadata: + """Create new CubeMetadata object by reducing the spatial dimensions.""" + dimensions = [d for d in self._dimensions if not isinstance(d, SpatialDimension)] + return self._clone_and_update(dimensions=dimensions) + + def add_dimension(self, name: str, label: Union[str, float], type: str = None) -> CubeMetadata: + """Create new CubeMetadata object with added dimension""" + if any(d.name == name for d in self._dimensions): + raise DimensionAlreadyExistsException(f"Dimension with name {name!r} already exists") + if type == "bands": + dim = BandDimension(name=name, bands=[Band(name=label)]) + elif type == "spatial": + dim = SpatialDimension(name=name, extent=[label, label]) + elif type == "temporal": + dim = TemporalDimension(name=name, extent=[label, label]) + else: + dim = Dimension(type=type or "other", name=name) + return self._clone_and_update(dimensions=self._dimensions + [dim]) + + def drop_dimension(self, name: str = None) -> CubeMetadata: + """Create new CubeMetadata object without dropped dimension with given name""" + dimension_names = self.dimension_names() + if name not in dimension_names: + raise ValueError("No dimension named {n!r} (valid names: {ns!r})".format(n=name, ns=dimension_names)) + return self._clone_and_update(dimensions=[d for d in self._dimensions if not d.name == name]) + + def __str__(self) -> str: + bands = self.band_names if self.has_band_dimension() else "no bands dimension" + return f"CubeMetadata({bands} - {self.dimension_names()})" + + +
+[docs] +class CollectionMetadata(CubeMetadata): + """ + Wrapper for EO Data Collection metadata. + + Simplifies getting values from deeply nested mappings, + allows additional parsing and normalizing compatibility issues. + + Metadata is expected to follow format defined by + https://openeo.org/documentation/1.0/developers/api/reference.html#operation/describe-collection + (with partial support for older versions) + + """ + + def __init__(self, metadata: dict, dimensions: List[Dimension] = None): + self._orig_metadata = metadata + if dimensions is None: + dimensions = self._parse_dimensions(self._orig_metadata) + + super().__init__(dimensions=dimensions) + + @classmethod + def _parse_dimensions(cls, spec: dict, complain: Callable[[str], None] = warnings.warn) -> List[Dimension]: + """ + Extract data cube dimension metadata from STAC-like description of a collection. + + Dimension metadata comes from different places in spec: + - 'cube:dimensions' has dimension names (e.g. 'x', 'y', 't'), dimension extent info + and band names for band dimensions + - 'eo:bands' has more detailed band information like "common" name and wavelength info + + This helper tries to normalize/combine these sources. + + :param spec: STAC like collection metadata dict + :param complain: handler for warnings + :return list: list of `Dimension` objects + + """ + + # Dimension info is in `cube:dimensions` (or 0.4-style `properties/cube:dimensions`) + cube_dimensions = ( + deep_get(spec, "cube:dimensions", default=None) + or deep_get(spec, "properties", "cube:dimensions", default=None) + or {} + ) + if not cube_dimensions: + complain("No cube:dimensions metadata") + dimensions = [] + for name, info in cube_dimensions.items(): + dim_type = info.get("type") + if dim_type == "spatial": + dimensions.append( + SpatialDimension( + name=name, + extent=info.get("extent"), + crs=info.get("reference_system", SpatialDimension.DEFAULT_CRS), + step=info.get("step", None), + ) + ) + elif dim_type == "temporal": + dimensions.append(TemporalDimension(name=name, extent=info.get("extent"))) + elif dim_type == "bands": + bands = [Band(name=b) for b in info.get("values", [])] + if not bands: + complain("No band names in dimension {d!r}".format(d=name)) + dimensions.append(BandDimension(name=name, bands=bands)) + else: + complain("Unknown dimension type {t!r}".format(t=dim_type)) + dimensions.append(Dimension(name=name, type=dim_type)) + + # Detailed band information: `summaries/[eo|raster]:bands` (and 0.4 style `properties/eo:bands`) + eo_bands = ( + deep_get(spec, "summaries", "eo:bands", default=None) + or deep_get(spec, "summaries", "raster:bands", default=None) + or deep_get(spec, "properties", "eo:bands", default=None) + ) + if eo_bands: + # center_wavelength is in micrometer according to spec + bands_detailed = [ + Band( + name=b["name"], + common_name=b.get("common_name"), + wavelength_um=b.get("center_wavelength"), + aliases=b.get("aliases"), + gsd=b.get("openeo:gsd"), + ) + for b in eo_bands + ] + # Update band dimension with more detailed info + band_dimensions = [d for d in dimensions if d.type == "bands"] + if len(band_dimensions) == 1: + dim = band_dimensions[0] + # Update band values from 'cube:dimensions' with more detailed 'eo:bands' info + eo_band_names = [b.name for b in bands_detailed] + cube_dimension_band_names = [b.name for b in dim.bands] + if eo_band_names == cube_dimension_band_names: + dim.bands = bands_detailed + else: + complain("Band name mismatch: {a} != {b}".format(a=cube_dimension_band_names, b=eo_band_names)) + elif len(band_dimensions) == 0: + if len(dimensions) == 0: + complain("Assuming name 'bands' for anonymous band dimension.") + dimensions.append(BandDimension(name="bands", bands=bands_detailed)) + else: + complain("No 'bands' dimension in 'cube:dimensions' while having 'eo:bands' or 'raster:bands'") + else: + complain("Multiple dimensions of type 'bands'") + + return dimensions + + def _clone_and_update( + self, metadata: dict = None, dimensions: List[Dimension] = None, **kwargs + ) -> CollectionMetadata: + """ + Create a new instance (of same class) with copied/updated fields. + + This overrides the method in `CubeMetadata` to keep the original metadata. + """ + cls = type(self) + if metadata is None: + metadata = self._orig_metadata + if dimensions is None: + dimensions = self._dimensions + return cls(metadata=metadata, dimensions=dimensions, **kwargs) + + def get(self, *args, default=None): + return deep_get(self._orig_metadata, *args, default=default) + + @property + def extent(self) -> dict: + # TODO: is this currently used and relevant? + # TODO: check against extent metadata in dimensions + return self._orig_metadata.get("extent") + + def _repr_html_(self): + return render_component("collection", data=self._orig_metadata) + + def __str__(self) -> str: + bands = self.band_names if self.has_band_dimension() else "no bands dimension" + return f"CollectionMetadata({self.extent} - {bands} - {self.dimension_names()})"
+ + + +def metadata_from_stac(url: str) -> CubeMetadata: + """ + Reads the band metadata a static STAC catalog or a STAC API Collection and returns it as a :py:class:`CubeMetadata` + + :param url: The URL to a static STAC catalog (STAC Item, STAC Collection, or STAC Catalog) or a specific STAC API Collection + :return: A :py:class:`CubeMetadata` containing the DataCube band metadata from the url. + """ + + # TODO move these nested functions and other logic to _StacMetadataParser + + def get_band_metadata(eo_bands_location: dict) -> List[Band]: + # TODO: return None iso empty list when no metadata? + return [ + Band(name=band["name"], common_name=band.get("common_name"), wavelength_um=band.get("center_wavelength")) + for band in eo_bands_location.get("eo:bands", []) + ] + + def get_band_names(bands: List[Band]) -> List[str]: + return [band.name for band in bands] + + def is_band_asset(asset: pystac.Asset) -> bool: + return "eo:bands" in asset.extra_fields + + stac_object = pystac.read_file(href=url) + + if isinstance(stac_object, pystac.Item): + item = stac_object + if "eo:bands" in item.properties: + eo_bands_location = item.properties + elif item.get_collection() is not None: + # TODO: Also do asset based band detection (like below)? + eo_bands_location = item.get_collection().summaries.lists + else: + eo_bands_location = {} + bands = get_band_metadata(eo_bands_location) + + elif isinstance(stac_object, pystac.Collection): + collection = stac_object + bands = get_band_metadata(collection.summaries.lists) + + # Summaries is not a required field in a STAC collection, so also check the assets + for itm in collection.get_items(): + band_assets = {asset_id: asset for asset_id, asset in itm.get_assets().items() if is_band_asset(asset)} + + for asset in band_assets.values(): + asset_bands = get_band_metadata(asset.extra_fields) + for asset_band in asset_bands: + if asset_band.name not in get_band_names(bands): + bands.append(asset_band) + if _PYSTAC_1_9_EXTENSION_INTERFACE and collection.ext.has("item_assets"): + # TODO #575 support unordered band names and avoid conversion to a list. + bands = list(_StacMetadataParser().get_bands_from_item_assets(collection.ext.item_assets)) + + elif isinstance(stac_object, pystac.Catalog): + catalog = stac_object + bands = get_band_metadata(catalog.extra_fields.get("summaries", {})) + else: + raise ValueError(stac_object) + + # TODO: conditionally include band dimension when there was actual indication of band metadata? + band_dimension = BandDimension(name="bands", bands=bands) + dimensions = [band_dimension] + + # TODO: is it possible to derive the actual name of temporal dimension that the backend will use? + temporal_dimension = _StacMetadataParser().get_temporal_dimension(stac_object) + if temporal_dimension: + dimensions.append(temporal_dimension) + + metadata = CubeMetadata(dimensions=dimensions) + return metadata + +# Sniff for PySTAC extension API since version 1.9.0 (which is not available below Python 3.9) +# TODO: remove this once support for Python 3.7 and 3.8 is dropped +_PYSTAC_1_9_EXTENSION_INTERFACE = hasattr(pystac.Item, "ext") + + +class _StacMetadataParser: + """ + Helper to extract openEO metadata from STAC metadata resource + """ + + def __init__(self): + # TODO: toggles for how to handle strictness, warnings, logging, etc + pass + + def _get_band_from_eo_bands_item(self, eo_band: Union[dict, pystac.extensions.eo.Band]) -> Band: + if isinstance(eo_band, pystac.extensions.eo.Band): + return Band( + name=eo_band.name, + common_name=eo_band.common_name, + wavelength_um=eo_band.center_wavelength, + ) + elif isinstance(eo_band, dict) and "name" in eo_band: + return Band( + name=eo_band["name"], + common_name=eo_band.get("common_name"), + wavelength_um=eo_band.get("center_wavelength"), + ) + else: + raise ValueError(eo_band) + + def get_bands_from_eo_bands(self, eo_bands: List[Union[dict, pystac.extensions.eo.Band]]) -> List[Band]: + """ + Extract bands from STAC `eo:bands` array + + :param eo_bands: List of band objects, as dict or `pystac.extensions.eo.Band` instances + """ + # TODO: option to skip bands that failed to parse in some way? + return [self._get_band_from_eo_bands_item(band) for band in eo_bands] + + def _get_bands_from_item_asset( + self, item_asset: pystac.extensions.item_assets.AssetDefinition, *, _warn: Callable[[str], None] = _log.warning + ) -> Union[List[Band], None]: + """Get bands from a STAC 'item_assets' asset definition.""" + if _PYSTAC_1_9_EXTENSION_INTERFACE and item_asset.ext.has("eo"): + if item_asset.ext.eo.bands is not None: + return self.get_bands_from_eo_bands(item_asset.ext.eo.bands) + elif "eo:bands" in item_asset.properties: + # TODO: skip this in strict mode? + if _PYSTAC_1_9_EXTENSION_INTERFACE: + _warn("Extracting band info from 'eo:bands' metadata, but 'eo' STAC extension was not declared.") + return self.get_bands_from_eo_bands(item_asset.properties["eo:bands"]) + + def get_bands_from_item_assets( + self, item_assets: Dict[str, pystac.extensions.item_assets.AssetDefinition] + ) -> Set[Band]: + """ + Get bands extracted from "item_assets" objects (defined by "item-assets" extension, + in combination with "eo" extension) at STAC Collection top-level, + + Note that "item_assets" in STAC is a mapping, so the band order is undefined, + which is why we return a set of bands here. + + :param item_assets: a STAC `item_assets` mapping + """ + bands = set() + # Trick to just warn once per collection + _warn = functools.lru_cache()(_log.warning) + for item_asset in item_assets.values(): + asset_bands = self._get_bands_from_item_asset(item_asset, _warn=_warn) + if asset_bands: + bands.update(asset_bands) + return bands + + def get_temporal_dimension(self, stac_obj: pystac.STACObject) -> Union[TemporalDimension, None]: + """ + Extract the temporal dimension from a STAC Collection/Item (if any) + """ + # TODO: also extract temporal dimension from assets? + if _PYSTAC_1_9_EXTENSION_INTERFACE: + if stac_obj.ext.has("cube") and hasattr(stac_obj.ext, "cube"): + temporal_dims = [ + (n, d.extent or [None, None]) + for (n, d) in stac_obj.ext.cube.dimensions.items() + if d.dim_type == pystac.extensions.datacube.DimensionType.TEMPORAL + ] + if len(temporal_dims) == 1: + name, extent = temporal_dims[0] + return TemporalDimension(name=name, extent=extent) + elif isinstance(stac_obj, pystac.Collection) and stac_obj.extent.temporal: + # No explicit "cube:dimensions": build fallback from "extent.temporal", + # with dimension name "t" (openEO API recommendation). + extent = [Rfc3339(propagate_none=True).normalize(d) for d in stac_obj.extent.temporal.intervals[0]] + return TemporalDimension(name="t", extent=extent) + else: + if isinstance(stac_obj, pystac.Item): + cube_dimensions = stac_obj.properties.get("cube:dimensions", {}) + elif isinstance(stac_obj, pystac.Collection): + cube_dimensions = stac_obj.extra_fields.get("cube:dimensions", {}) + else: + cube_dimensions = {} + temporal_dims = [ + (n, d.get("extent", [None, None])) for (n, d) in cube_dimensions.items() if d.get("type") == "temporal" + ] + if len(temporal_dims) == 1: + name, extent = temporal_dims[0] + return TemporalDimension(name=name, extent=extent) +
+ +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/openeo/processes.html b/_modules/openeo/processes.html new file mode 100644 index 000000000..92f1e0310 --- /dev/null +++ b/_modules/openeo/processes.html @@ -0,0 +1,6173 @@ + + + + + + + openeo.processes — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +

Source code for openeo.processes

+
+# Do not edit this file directly.
+# It is automatically generated.
+# Used command line arguments:
+#    openeo/internal/processes/generator.py specs/openeo-processes specs/openeo-processes/proposals specs/openeo-processes-legacy --output openeo/processes.py
+# Generated on 2024-01-09
+
+from __future__ import annotations
+
+import builtins
+
+from openeo.internal.documentation import openeo_process
+from openeo.internal.processes.builder import UNSET, ProcessBuilderBase
+from openeo.rest._datacube import build_child_callback
+
+
+
+[docs] +class ProcessBuilder(ProcessBuilderBase): + """ + .. include:: api-processbuilder.rst + """ + + _ITERATION_LIMIT = 100 + + @openeo_process(process_id="add", mode="operator") + def __add__(self, other) -> ProcessBuilder: + return self.add(other) + + @openeo_process(process_id="add", mode="operator") + def __radd__(self, other) -> ProcessBuilder: + return add(other, self) + + @openeo_process(process_id="subtract", mode="operator") + def __sub__(self, other) -> ProcessBuilder: + return self.subtract(other) + + @openeo_process(process_id="subtract", mode="operator") + def __rsub__(self, other) -> ProcessBuilder: + return subtract(other, self) + + @openeo_process(process_id="multiply", mode="operator") + def __mul__(self, other) -> ProcessBuilder: + return self.multiply(other) + + @openeo_process(process_id="multiply", mode="operator") + def __rmul__(self, other) -> ProcessBuilder: + return multiply(other, self) + + @openeo_process(process_id="divide", mode="operator") + def __truediv__(self, other) -> ProcessBuilder: + return self.divide(other) + + @openeo_process(process_id="divide", mode="operator") + def __rtruediv__(self, other) -> ProcessBuilder: + return divide(other, self) + + @openeo_process(process_id="multiply", mode="operator") + def __neg__(self) -> ProcessBuilder: + return self.multiply(-1) + + @openeo_process(process_id="power", mode="operator") + def __pow__(self, other) -> ProcessBuilder: + return self.power(other) + + @openeo_process(process_id="array_element", mode="operator") + def __getitem__(self, key) -> ProcessBuilder: + if isinstance(key, builtins.int): + if key > self._ITERATION_LIMIT: + raise RuntimeError( + "Exceeded ProcessBuilder iteration limit. " + "Are you mistakenly using a Python builtin like `sum()` or `all()` in a callback " + "instead of the appropriate helpers from the `openeo.processes` module?" + ) + return self.array_element(index=key) + else: + return self.array_element(label=key) + + @openeo_process(process_id="eq", mode="operator") + def __eq__(self, other) -> ProcessBuilder: + return eq(self, other) + + @openeo_process(process_id="neq", mode="operator") + def __ne__(self, other) -> ProcessBuilder: + return neq(self, other) + + @openeo_process(process_id="lt", mode="operator") + def __lt__(self, other) -> ProcessBuilder: + return lt(self, other) + + @openeo_process(process_id="lte", mode="operator") + def __le__(self, other) -> ProcessBuilder: + return lte(self, other) + + @openeo_process(process_id="ge", mode="operator") + def __ge__(self, other) -> ProcessBuilder: + return gte(self, other) + + @openeo_process(process_id="gt", mode="operator") + def __gt__(self, other) -> ProcessBuilder: + return gt(self, other) + + @openeo_process + def absolute(self) -> ProcessBuilder: + """ + Absolute value + + :param self: A number. + + :return: The computed absolute value. + """ + return absolute(x=self) + + @openeo_process + def add(self, y) -> ProcessBuilder: + """ + Addition of two numbers + + :param self: The first summand. + :param y: The second summand. + + :return: The computed sum of the two numbers. + """ + return add(x=self, y=y) + + @openeo_process + def add_dimension(self, name, label, type=UNSET) -> ProcessBuilder: + """ + Add a new dimension + + :param self: A data cube to add the dimension to. + :param name: Name for the dimension. + :param label: A dimension label. + :param type: The type of dimension, defaults to `other`. + + :return: The data cube with a newly added dimension. The new dimension has exactly one dimension label. + All other dimensions remain unchanged. + """ + return add_dimension(data=self, name=name, label=label, type=type) + + @openeo_process + def aggregate_spatial(self, geometries, reducer, target_dimension=UNSET, context=UNSET) -> ProcessBuilder: + """ + Zonal statistics for geometries + + :param self: A raster data cube with at least two spatial dimensions. The data cube implicitly gets + restricted to the bounds of the geometries as if ``filter_spatial()`` would have been used with the + same values for the corresponding parameters immediately before this process. + :param geometries: Geometries for which the aggregation will be computed. Feature properties are + preserved for vector data cubes and all GeoJSON Features. One value will be computed per label in the + dimension of type `geometries`, GeoJSON `Feature` or `Geometry`. For a `FeatureCollection` multiple + values will be computed, one value per contained `Feature`. No values will be computed for empty + geometries. For example, a single value will be computed for a `MultiPolygon`, but two values will be + computed for a `FeatureCollection` containing two polygons. - For **polygons**, the process considers + all pixels for which the point at the pixel center intersects with the corresponding polygon (as + defined in the Simple Features standard by the OGC). - For **points**, the process considers the + closest pixel center. - For **lines** (line strings), the process considers all the pixels whose + centers are closest to at least one point on the line. Thus, pixels may be part of multiple geometries + and be part of multiple aggregations. No operation is applied to geometries that are outside of the + bounds of the data. + :param reducer: A reducer to be applied on all values of each geometry. A reducer is a single process + such as ``mean()`` or a set of processes, which computes a single value for a list of values, see the + category 'reducer' for such processes. + :param target_dimension: By default (which is `null`), the process only computes the results and + doesn't add a new dimension. If this parameter contains a new dimension name, the computation also + stores information about the total count of pixels (valid + invalid pixels) and the number of valid + pixels (see ``is_valid()``) for each computed value. These values are added as a new dimension. The new + dimension of type `other` has the dimension labels `value`, `total_count` and `valid_count`. Fails + with a `TargetDimensionExists` exception if a dimension with the specified name exists. + :param context: Additional data to be passed to the reducer. + + :return: A vector data cube with the computed results. Empty geometries still exist but without any + aggregated values (i.e. no-data). The spatial dimensions are replaced by a dimension of type + 'geometries' and if `target_dimension` is not `null`, a new dimension is added. + """ + return aggregate_spatial( + data=self, + geometries=geometries, + reducer=build_child_callback(reducer, parent_parameters=['data', 'context']), + target_dimension=target_dimension, + context=context + ) + + @openeo_process + def aggregate_spatial_window(self, reducer, size, boundary=UNSET, align=UNSET, context=UNSET) -> ProcessBuilder: + """ + Zonal statistics for rectangular windows + + :param self: A raster data cube with exactly two horizontal spatial dimensions and an arbitrary number + of additional dimensions. The process is applied to all additional dimensions individually. + :param reducer: A reducer to be applied on the list of values, which contain all pixels covered by the + window. A reducer is a single process such as ``mean()`` or a set of processes, which computes a single + value for a list of values, see the category 'reducer' for such processes. + :param size: Window size in pixels along the horizontal spatial dimensions. The first value + corresponds to the `x` axis, the second value corresponds to the `y` axis. + :param boundary: Behavior to apply if the number of values for the axes `x` and `y` is not a multiple + of the corresponding value in the `size` parameter. Options are: - `pad` (default): pad the data cube + with the no-data value `null` to fit the required window size. - `trim`: trim the data cube to fit the + required window size. Set the parameter `align` to specifies to which corner the data is aligned to. + :param align: If the data requires padding or trimming (see parameter `boundary`), specifies to which + corner of the spatial extent the data is aligned to. For example, if the data is aligned to the upper + left, the process pads/trims at the lower-right. + :param context: Additional data to be passed to the reducer. + + :return: A raster data cube with the newly computed values and the same dimensions. The resolution + will change depending on the chosen values for the `size` and `boundary` parameter. It usually + decreases for the dimensions which have the corresponding parameter `size` set to values greater than + 1. The dimension labels will be set to the coordinate at the center of the window. The other dimension + properties (name, type and reference system) remain unchanged. + """ + return aggregate_spatial_window( + data=self, + reducer=build_child_callback(reducer, parent_parameters=['data', 'context']), + size=size, + boundary=boundary, + align=align, + context=context + ) + + @openeo_process + def aggregate_temporal(self, intervals, reducer, labels=UNSET, dimension=UNSET, context=UNSET) -> ProcessBuilder: + """ + Temporal aggregations + + :param self: A data cube. + :param intervals: Left-closed temporal intervals, which are allowed to overlap. Each temporal interval + in the array has exactly two elements: 1. The first element is the start of the temporal interval. The + specified time instant is **included** in the interval. 2. The second element is the end of the + temporal interval. The specified time instant is **excluded** from the interval. The second element + must always be greater/later than the first element, except when using time without date. Otherwise, a + `TemporalExtentEmpty` exception is thrown. + :param reducer: A reducer to be applied for the values contained in each interval. A reducer is a + single process such as ``mean()`` or a set of processes, which computes a single value for a list of + values, see the category 'reducer' for such processes. Intervals may not contain any values, which for + most reducers leads to no-data (`null`) values by default. + :param labels: Distinct labels for the intervals, which can contain dates and/or times. Is only + required to be specified if the values for the start of the temporal intervals are not distinct and + thus the default labels would not be unique. The number of labels and the number of groups need to be + equal. + :param dimension: The name of the temporal dimension for aggregation. All data along the dimension is + passed through the specified reducer. If the dimension is not set or set to `null`, the data cube is + expected to only have one temporal dimension. Fails with a `TooManyDimensions` exception if it has more + dimensions. Fails with a `DimensionNotAvailable` exception if the specified dimension does not exist. + :param context: Additional data to be passed to the reducer. + + :return: A new data cube with the same dimensions. The dimension properties (name, type, labels, + reference system and resolution) remain unchanged, except for the resolution and dimension labels of + the given temporal dimension. + """ + return aggregate_temporal( + data=self, + intervals=intervals, + reducer=build_child_callback(reducer, parent_parameters=['data', 'context']), + labels=labels, + dimension=dimension, + context=context + ) + + @openeo_process + def aggregate_temporal_period(self, period, reducer, dimension=UNSET, context=UNSET) -> ProcessBuilder: + """ + Temporal aggregations based on calendar hierarchies + + :param self: The source data cube. + :param period: The time intervals to aggregate. The following pre-defined values are available: * + `hour`: Hour of the day * `day`: Day of the year * `week`: Week of the year * `dekad`: Ten day periods, + counted per year with three periods per month (day 1 - 10, 11 - 20 and 21 - end of month). The third + dekad of the month can range from 8 to 11 days. For example, the third dekad of a year spans from + January 21 till January 31 (11 days), the fourth dekad spans from February 1 till February 10 (10 days) + and the sixth dekad spans from February 21 till February 28 or February 29 in a leap year (8 or 9 days + respectively). * `month`: Month of the year * `season`: Three month periods of the calendar seasons + (December - February, March - May, June - August, September - November). * `tropical-season`: Six month + periods of the tropical seasons (November - April, May - October). * `year`: Proleptic years * + `decade`: Ten year periods ([0-to-9 decade](https://en.wikipedia.org/wiki/Decade#0-to-9_decade)), from + a year ending in a 0 to the next year ending in a 9. * `decade-ad`: Ten year periods ([1-to-0 + decade](https://en.wikipedia.org/wiki/Decade#1-to-0_decade)) better aligned with the anno Domini (AD) + calendar era, from a year ending in a 1 to the next year ending in a 0. + :param reducer: A reducer to be applied for the values contained in each period. A reducer is a single + process such as ``mean()`` or a set of processes, which computes a single value for a list of values, + see the category 'reducer' for such processes. Periods may not contain any values, which for most + reducers leads to no-data (`null`) values by default. + :param dimension: The name of the temporal dimension for aggregation. All data along the dimension is + passed through the specified reducer. If the dimension is not set or set to `null`, the source data + cube is expected to only have one temporal dimension. Fails with a `TooManyDimensions` exception if it + has more dimensions. Fails with a `DimensionNotAvailable` exception if the specified dimension does not + exist. + :param context: Additional data to be passed to the reducer. + + :return: A new data cube with the same dimensions. The dimension properties (name, type, labels, + reference system and resolution) remain unchanged, except for the resolution and dimension labels of + the given temporal dimension. The specified temporal dimension has the following dimension labels + (`YYYY` = four-digit year, `MM` = two-digit month, `DD` two-digit day of month): * `hour`: `YYYY-MM- + DD-00` - `YYYY-MM-DD-23` * `day`: `YYYY-001` - `YYYY-365` * `week`: `YYYY-01` - `YYYY-52` * `dekad`: + `YYYY-00` - `YYYY-36` * `month`: `YYYY-01` - `YYYY-12` * `season`: `YYYY-djf` (December - February), + `YYYY-mam` (March - May), `YYYY-jja` (June - August), `YYYY-son` (September - November). * `tropical- + season`: `YYYY-ndjfma` (November - April), `YYYY-mjjaso` (May - October). * `year`: `YYYY` * `decade`: + `YYY0` * `decade-ad`: `YYY1` The dimension labels in the new data cube are complete for the whole + extent of the source data cube. For example, if `period` is set to `day` and the source data cube has + two dimension labels at the beginning of the year (`2020-01-01`) and the end of a year (`2020-12-31`), + the process returns a data cube with 365 dimension labels (`2020-001`, `2020-002`, ..., `2020-365`). In + contrast, if `period` is set to `day` and the source data cube has just one dimension label + `2020-01-05`, the process returns a data cube with just a single dimension label (`2020-005`). + """ + return aggregate_temporal_period( + data=self, + period=period, + reducer=build_child_callback(reducer, parent_parameters=['data', 'context']), + dimension=dimension, + context=context + ) + + @openeo_process + def all(self, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Are all of the values true? + + :param self: A set of boolean values. + :param ignore_nodata: Indicates whether no-data values are ignored or not and ignores them by default. + + :return: Boolean result of the logical operation. + """ + return all(data=self, ignore_nodata=ignore_nodata) + + @openeo_process + def and_(self, y) -> ProcessBuilder: + """ + Logical AND + + :param self: A boolean value. + :param y: A boolean value. + + :return: Boolean result of the logical AND. + """ + return and_(x=self, y=y) + + @openeo_process + def anomaly(self, normals, period) -> ProcessBuilder: + """ + Compute anomalies + + :param self: A data cube with exactly one temporal dimension and the following dimension labels for the + given period (`YYYY` = four-digit year, `MM` = two-digit month, `DD` two-digit day of month): * + `hour`: `YYYY-MM-DD-00` - `YYYY-MM-DD-23` * `day`: `YYYY-001` - `YYYY-365` * `week`: `YYYY-01` - + `YYYY-52` * `dekad`: `YYYY-00` - `YYYY-36` * `month`: `YYYY-01` - `YYYY-12` * `season`: `YYYY-djf` + (December - February), `YYYY-mam` (March - May), `YYYY-jja` (June - August), `YYYY-son` (September - + November). * `tropical-season`: `YYYY-ndjfma` (November - April), `YYYY-mjjaso` (May - October). * + `year`: `YYYY` * `decade`: `YYY0` * `decade-ad`: `YYY1` * `single-period` / `climatology-period`: Any + ``aggregate_temporal_period()`` can compute such a data cube. + :param normals: A data cube with normals, e.g. daily, monthly or yearly values computed from a process + such as ``climatological_normal()``. Must contain exactly one temporal dimension with the following + dimension labels for the given period: * `hour`: `00` - `23` * `day`: `001` - `365` * `week`: `01` - + `52` * `dekad`: `00` - `36` * `month`: `01` - `12` * `season`: `djf` (December - February), `mam` + (March - May), `jja` (June - August), `son` (September - November) * `tropical-season`: `ndjfma` + (November - April), `mjjaso` (May - October) * `year`: Four-digit year numbers * `decade`: Four-digit + year numbers, the last digit being a `0` * `decade-ad`: Four-digit year numbers, the last digit being a + `1` * `single-period` / `climatology-period`: A single dimension label with any name is expected. + :param period: Specifies the time intervals available in the normals data cube. The following options + are available: * `hour`: Hour of the day * `day`: Day of the year * `week`: Week of the year * + `dekad`: Ten day periods, counted per year with three periods per month (day 1 - 10, 11 - 20 and 21 - + end of month). The third dekad of the month can range from 8 to 11 days. For example, the fourth dekad + is Feb, 1 - Feb, 10 each year. * `month`: Month of the year * `season`: Three month periods of the + calendar seasons (December - February, March - May, June - August, September - November). * `tropical- + season`: Six month periods of the tropical seasons (November - April, May - October). * `year`: + Proleptic years * `decade`: Ten year periods ([0-to-9 + decade](https://en.wikipedia.org/wiki/Decade#0-to-9_decade)), from a year ending in a 0 to the next + year ending in a 9. * `decade-ad`: Ten year periods ([1-to-0 + decade](https://en.wikipedia.org/wiki/Decade#1-to-0_decade)) better aligned with the anno Domini (AD) + calendar era, from a year ending in a 1 to the next year ending in a 0. * `single-period` / + `climatology-period`: A single period of arbitrary length + + :return: A data cube with the same dimensions. The dimension properties (name, type, labels, reference + system and resolution) remain unchanged. + """ + return anomaly(data=self, normals=normals, period=period) + + @openeo_process + def any(self, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Is at least one value true? + + :param self: A set of boolean values. + :param ignore_nodata: Indicates whether no-data values are ignored or not and ignores them by default. + + :return: Boolean result of the logical operation. + """ + return any(data=self, ignore_nodata=ignore_nodata) + + @openeo_process + def apply(self, process, context=UNSET) -> ProcessBuilder: + """ + Apply a process to each value + + :param self: A data cube. + :param process: A process that accepts and returns a single value and is applied on each individual + value in the data cube. The process may consist of multiple sub-processes and could, for example, + consist of processes such as ``absolute()`` or ``linear_scale_range()``. + :param context: Additional data to be passed to the process. + + :return: A data cube with the newly computed values and the same dimensions. The dimension properties + (name, type, labels, reference system and resolution) remain unchanged. + """ + return apply(data=self, process=build_child_callback(process, parent_parameters=['x', 'context']), context=context) + + @openeo_process + def apply_dimension(self, process, dimension, target_dimension=UNSET, context=UNSET) -> ProcessBuilder: + """ + Apply a process to all values along a dimension + + :param self: A data cube. + :param process: Process to be applied on all values along the given dimension. The specified process + needs to accept an array and must return an array with at least one element. A process may consist of + multiple sub-processes. + :param dimension: The name of the source dimension to apply the process on. Fails with a + `DimensionNotAvailable` exception if the specified dimension does not exist. + :param target_dimension: The name of the target dimension or `null` (the default) to use the source + dimension specified in the parameter `dimension`. By specifying a target dimension, the source + dimension is removed. The target dimension with the specified name and the type `other` (see + ``add_dimension()``) is created, if it doesn't exist yet. + :param context: Additional data to be passed to the process. + + :return: A data cube with the newly computed values. All dimensions stay the same, except for the + dimensions specified in corresponding parameters. There are three cases how the dimensions can change: + 1. The source dimension is the target dimension: - The (number of) dimensions remain unchanged as + the source dimension is the target dimension. - The source dimension properties name and type remain + unchanged. - The dimension labels, the reference system and the resolution are preserved only if the + number of values in the source dimension is equal to the number of values computed by the process. + Otherwise, all other dimension properties change as defined in the list below. 2. The source dimension + is not the target dimension. The target dimension exists with a single label only: - The number of + dimensions decreases by one as the source dimension is 'dropped' and the target dimension is filled + with the processed data that originates from the source dimension. - The target dimension properties + name and type remain unchanged. All other dimension properties change as defined in the list below. 3. + The source dimension is not the target dimension and the latter does not exist: - The number of + dimensions remain unchanged, but the source dimension is replaced with the target dimension. - The + target dimension has the specified name and the type other. All other dimension properties are set as + defined in the list below. Unless otherwise stated above, for the given (target) dimension the + following applies: - the number of dimension labels is equal to the number of values computed by the + process, - the dimension labels are incrementing integers starting from zero, - the resolution changes, + and - the reference system is undefined. + """ + return apply_dimension( + data=self, + process=build_child_callback(process, parent_parameters=['data', 'context']), + dimension=dimension, + target_dimension=target_dimension, + context=context + ) + + @openeo_process + def apply_kernel(self, kernel, factor=UNSET, border=UNSET, replace_invalid=UNSET) -> ProcessBuilder: + """ + Apply a spatial convolution with a kernel + + :param self: A raster data cube. + :param kernel: Kernel as a two-dimensional array of weights. The inner level of the nested array aligns + with the `x` axis and the outer level aligns with the `y` axis. Each level of the kernel must have an + uneven number of elements, otherwise the process throws a `KernelDimensionsUneven` exception. + :param factor: A factor that is multiplied to each value after the kernel has been applied. This is + basically a shortcut for explicitly multiplying each value by a factor afterwards, which is often + required for some kernel-based algorithms such as the Gaussian blur. + :param border: Determines how the data is extended when the kernel overlaps with the borders. Defaults + to fill the border with zeroes. The following options are available: * *numeric value* - fill with a + user-defined constant number `n`: `nnnnnn|abcdefgh|nnnnnn` (default, with `n` = 0) * `replicate` - + repeat the value from the pixel at the border: `aaaaaa|abcdefgh|hhhhhh` * `reflect` - mirror/reflect + from the border: `fedcba|abcdefgh|hgfedc` * `reflect_pixel` - mirror/reflect from the center of the + pixel at the border: `gfedcb|abcdefgh|gfedcb` * `wrap` - repeat/wrap the image: + `cdefgh|abcdefgh|abcdef` + :param replace_invalid: This parameter specifies the value to replace non-numerical or infinite + numerical values with. By default, those values are replaced with zeroes. + + :return: A data cube with the newly computed values and the same dimensions. The dimension properties + (name, type, labels, reference system and resolution) remain unchanged. + """ + return apply_kernel(data=self, kernel=kernel, factor=factor, border=border, replace_invalid=replace_invalid) + + @openeo_process + def apply_neighborhood(self, process, size, overlap=UNSET, context=UNSET) -> ProcessBuilder: + """ + Apply a process to pixels in a n-dimensional neighborhood + + :param self: A raster data cube. + :param process: Process to be applied on all neighborhoods. + :param size: Neighborhood sizes along each dimension. This object maps dimension names to either a + physical measure (e.g. 100 m, 10 days) or pixels (e.g. 32 pixels). For dimensions not specified, the + default is to provide all values. Be aware that including all values from overly large dimensions may + not be processed at once. + :param overlap: Overlap of neighborhoods along each dimension to avoid border effects. By default no + overlap is provided. For instance a temporal dimension can add 1 month before and after a + neighborhood. In the spatial dimensions, this is often a number of pixels. The overlap specified is + added before and after, so an overlap of 8 pixels will add 8 pixels on both sides of the window, so 16 + in total. Be aware that large overlaps increase the need for computational resources and modifying + overlapping data in subsequent operations have no effect. + :param context: Additional data to be passed to the process. + + :return: A raster data cube with the newly computed values and the same dimensions. The dimension + properties (name, type, labels, reference system and resolution) remain unchanged. + """ + return apply_neighborhood( + data=self, + process=build_child_callback(process, parent_parameters=['data', 'context']), + size=size, + overlap=overlap, + context=context + ) + + @openeo_process + def apply_polygon(self, polygons, process, mask_value=UNSET, context=UNSET) -> ProcessBuilder: + """ + Apply a process to segments of the data cube + + :param self: A data cube. + :param polygons: A vector data cube containing at least one polygon. The provided vector data can be + one of the following: * A `Polygon` or `MultiPolygon` geometry, * a `Feature` with a `Polygon` or + `MultiPolygon` geometry, or * a `FeatureCollection` containing at least one `Feature` with `Polygon` or + `MultiPolygon` geometries. * Empty geometries are ignored. + :param process: A process that accepts and returns a single data cube and is applied on each individual + sub data cube. The process may consist of multiple sub-processes. + :param mask_value: All pixels for which the point at the pixel center **does not** intersect with the + polygon are replaced with the given value, which defaults to `null` (no data). It can provide a + distinction between no data values within the polygon and masked pixels outside of it. + :param context: Additional data to be passed to the process. + + :return: A data cube with the newly computed values and the same dimensions. The dimension properties + (name, type, labels, reference system and resolution) remain unchanged. + """ + return apply_polygon( + data=self, + polygons=polygons, + process=build_child_callback(process, parent_parameters=['data', 'context']), + mask_value=mask_value, + context=context + ) + + @openeo_process + def arccos(self) -> ProcessBuilder: + """ + Inverse cosine + + :param self: A number. + + :return: The computed angle in radians. + """ + return arccos(x=self) + + @openeo_process + def arcosh(self) -> ProcessBuilder: + """ + Inverse hyperbolic cosine + + :param self: A number. + + :return: The computed angle in radians. + """ + return arcosh(x=self) + + @openeo_process + def arcsin(self) -> ProcessBuilder: + """ + Inverse sine + + :param self: A number. + + :return: The computed angle in radians. + """ + return arcsin(x=self) + + @openeo_process + def arctan(self) -> ProcessBuilder: + """ + Inverse tangent + + :param self: A number. + + :return: The computed angle in radians. + """ + return arctan(x=self) + + @openeo_process + def arctan2(self, x) -> ProcessBuilder: + """ + Inverse tangent of two numbers + + :param self: A number to be used as the dividend. + :param x: A number to be used as the divisor. + + :return: The computed angle in radians. + """ + return arctan2(y=self, x=x) + + @openeo_process + def ard_normalized_radar_backscatter(self, elevation_model=UNSET, contributing_area=UNSET, ellipsoid_incidence_angle=UNSET, noise_removal=UNSET, options=UNSET) -> ProcessBuilder: + """ + CARD4L compliant SAR NRB generation + + :param self: The source data cube containing SAR input. + :param elevation_model: The digital elevation model to use. Set to `null` (the default) to allow the + back-end to choose, which will improve portability, but reduce reproducibility. + :param contributing_area: If set to `true`, a DEM-based local contributing area band named + `contributing_area` is added. The values are given in square meters. + :param ellipsoid_incidence_angle: If set to `true`, an ellipsoidal incidence angle band named + `ellipsoid_incidence_angle` is added. The values are given in degrees. + :param noise_removal: If set to `false`, no noise removal is applied. Defaults to `true`, which removes + noise. + :param options: Proprietary options for the backscatter computations. Specifying proprietary options + will reduce portability. + + :return: Backscatter values expressed as gamma0 in linear scale. In addition to the bands + `contributing_area` and `ellipsoid_incidence_angle` that can optionally be added with corresponding + parameters, the following bands are always added to the data cube: - `mask`: A data mask that + indicates which values are valid (1), invalid (0) or contain no-data (null). - `local_incidence_angle`: + A band with DEM-based local incidence angles in degrees. The data returned is CARD4L compliant with + corresponding metadata. + """ + return ard_normalized_radar_backscatter( + data=self, + elevation_model=elevation_model, + contributing_area=contributing_area, + ellipsoid_incidence_angle=ellipsoid_incidence_angle, + noise_removal=noise_removal, + options=options + ) + + @openeo_process + def ard_surface_reflectance(self, atmospheric_correction_method, cloud_detection_method, elevation_model=UNSET, atmospheric_correction_options=UNSET, cloud_detection_options=UNSET) -> ProcessBuilder: + """ + CARD4L compliant Surface Reflectance generation + + :param self: The source data cube containing multi-spectral optical top of the atmosphere (TOA) + reflectances. There must be a single dimension of type `bands` available. + :param atmospheric_correction_method: The atmospheric correction method to use. + :param cloud_detection_method: The cloud detection method to use. Each method supports detecting + different atmospheric disturbances such as clouds, cloud shadows, aerosols, haze, ozone and/or water + vapour in optical imagery. + :param elevation_model: The digital elevation model to use. Set to `null` (the default) to allow the + back-end to choose, which will improve portability, but reduce reproducibility. + :param atmospheric_correction_options: Proprietary options for the atmospheric correction method. + Specifying proprietary options will reduce portability. + :param cloud_detection_options: Proprietary options for the cloud detection method. Specifying + proprietary options will reduce portability. + + :return: Data cube containing bottom of atmosphere reflectances for each spectral band in the source + data cube, with atmospheric disturbances like clouds and cloud shadows removed. No-data values (null) + are directly set in the bands. Depending on the methods used, several additional bands will be added to + the data cube: Data cube containing bottom of atmosphere reflectances for each spectral band in the + source data cube, with atmospheric disturbances like clouds and cloud shadows removed. Depending on the + methods used, several additional bands will be added to the data cube: - `date` (optional): Specifies + per-pixel acquisition timestamps. - `incomplete-testing` (required): Identifies pixels with a value of + 1 for which the per-pixel tests (at least saturation, cloud and cloud shadows, see CARD4L specification + for details) have not all been successfully completed. Otherwise, the value is 0. - `saturation` + (required) / `saturation_{band}` (optional): Indicates where pixels in the input spectral bands are + saturated (1) or not (0). If the saturation is given per band, the band names are `saturation_{band}` + with `{band}` being the band name from the source data cube. - `cloud`, `shadow` (both + required),`aerosol`, `haze`, `ozone`, `water_vapor` (all optional): Indicates the probability of pixels + being an atmospheric disturbance such as clouds. All bands have values between 0 (clear) and 1, which + describes the probability that it is an atmospheric disturbance. - `snow-ice` (optional): Points to a + file that indicates whether a pixel is assessed as being snow/ice (1) or not (0). All values describe + the probability and must be between 0 and 1. - `land-water` (optional): Indicates whether a pixel is + assessed as being land (1) or water (0). All values describe the probability and must be between 0 and + 1. - `incidence-angle` (optional): Specifies per-pixel incidence angles in degrees. - `azimuth` + (optional): Specifies per-pixel azimuth angles in degrees. - `sun-azimuth:` (optional): Specifies per- + pixel sun azimuth angles in degrees. - `sun-elevation` (optional): Specifies per-pixel sun elevation + angles in degrees. - `terrain-shadow` (optional): Indicates with a value of 1 whether a pixel is not + directly illuminated due to terrain shadowing. Otherwise, the value is 0. - `terrain-occlusion` + (optional): Indicates with a value of 1 whether a pixel is not visible to the sensor due to terrain + occlusion during off-nadir viewing. Otherwise, the value is 0. - `terrain-illumination` (optional): + Contains coefficients used for terrain illumination correction are provided for each pixel. The data + returned is CARD4L compliant with corresponding metadata. + """ + return ard_surface_reflectance( + data=self, + atmospheric_correction_method=atmospheric_correction_method, + cloud_detection_method=cloud_detection_method, + elevation_model=elevation_model, + atmospheric_correction_options=atmospheric_correction_options, + cloud_detection_options=cloud_detection_options + ) + + @openeo_process + def array_append(self, value, label=UNSET) -> ProcessBuilder: + """ + Append a value to an array + + :param self: An array. + :param value: Value to append to the array. + :param label: If the given array is a labeled array, a new label for the new value should be given. If + not given or `null`, the array index as string is used as the label. If in any case the label exists, a + `LabelExists` exception is thrown. + + :return: The new array with the value being appended. + """ + return array_append(data=self, value=value, label=label) + + @openeo_process + def array_apply(self, process, context=UNSET) -> ProcessBuilder: + """ + Apply a process to each array element + + :param self: An array. + :param process: A process that accepts and returns a single value and is applied on each individual + value in the array. The process may consist of multiple sub-processes and could, for example, consist + of processes such as ``absolute()`` or ``linear_scale_range()``. + :param context: Additional data to be passed to the process. + + :return: An array with the newly computed values. The number of elements are the same as for the + original array. + """ + return array_apply( + data=self, + process=build_child_callback(process, parent_parameters=['x', 'index', 'label', 'context']), + context=context + ) + + @openeo_process + def array_concat(self, array2) -> ProcessBuilder: + """ + Merge two arrays + + :param self: The first array. + :param array2: The second array. + + :return: The merged array. + """ + return array_concat(array1=self, array2=array2) + + @openeo_process + def array_contains(self, value) -> ProcessBuilder: + """ + Check whether the array contains a given value + + :param self: List to find the value in. + :param value: Value to find in `data`. If the value is `null`, this process returns always `false`. + + :return: `true` if the list contains the value, false` otherwise. + """ + return array_contains(data=self, value=value) + + @openeo_process + def array_create(self=UNSET, repeat=UNSET) -> ProcessBuilder: + """ + Create an array + + :param self: A (native) array to fill the newly created array with. Defaults to an empty array. + :param repeat: The number of times the (native) array specified in `data` is repeatedly added after + each other to the new array being created. Defaults to `1`. + + :return: The newly created array. + """ + return array_create(data=self, repeat=repeat) + + @openeo_process + def array_create_labeled(self, labels) -> ProcessBuilder: + """ + Create a labeled array + + :param self: An array of values to be used. + :param labels: An array of labels to be used. + + :return: The newly created labeled array. + """ + return array_create_labeled(data=self, labels=labels) + + @openeo_process + def array_element(self, index=UNSET, label=UNSET, return_nodata=UNSET) -> ProcessBuilder: + """ + Get an element from an array + + :param self: An array. + :param index: The zero-based index of the element to retrieve. + :param label: The label of the element to retrieve. Throws an `ArrayNotLabeled` exception, if the given + array is not a labeled array and this parameter is set. + :param return_nodata: By default this process throws an `ArrayElementNotAvailable` exception if the + index or label is invalid. If you want to return `null` instead, set this flag to `true`. + + :return: The value of the requested element. + """ + return array_element(data=self, index=index, label=label, return_nodata=return_nodata) + + @openeo_process + def array_filter(self, condition, context=UNSET) -> ProcessBuilder: + """ + Filter an array based on a condition + + :param self: An array. + :param condition: A condition that is evaluated against each value, index and/or label in the array. + Only the array elements for which the condition returns `true` are preserved. + :param context: Additional data to be passed to the condition. + + :return: An array filtered by the specified condition. The number of elements are less than or equal + compared to the original array. + """ + return array_filter( + data=self, + condition=build_child_callback(condition, parent_parameters=['x', 'index', 'label', 'context']), + context=context + ) + + @openeo_process + def array_find(self, value, reverse=UNSET) -> ProcessBuilder: + """ + Get the index for a value in an array + + :param self: List to find the value in. + :param value: Value to find in `data`. If the value is `null`, this process returns always `null`. + :param reverse: By default, this process finds the index of the first match. To return the index of the + last match instead, set this flag to `true`. + + :return: The index of the first element with the specified value. If no element was found, `null` is + returned. + """ + return array_find(data=self, value=value, reverse=reverse) + + @openeo_process + def array_find_label(self, label) -> ProcessBuilder: + """ + Get the index for a label in a labeled array + + :param self: List to find the label in. + :param label: Label to find in `data`. + + :return: The index of the element with the specified label assigned. If no such label was found, `null` + is returned. + """ + return array_find_label(data=self, label=label) + + @openeo_process + def array_interpolate_linear(self) -> ProcessBuilder: + """ + One-dimensional linear interpolation for arrays + + :param self: An array of numbers and no-data values. If the given array is a labeled array, the labels + must have a natural/inherent label order and the process expects the labels to be sorted accordingly. + This is the default behavior in openEO for spatial and temporal dimensions. + + :return: An array with no-data values being replaced with interpolated values. If not at least 2 + numerical values are available in the array, the array stays the same. + """ + return array_interpolate_linear(data=self) + + @openeo_process + def array_labels(self) -> ProcessBuilder: + """ + Get the labels for an array + + :param self: An array. + + :return: The labels or indices as array. + """ + return array_labels(data=self) + + @openeo_process + def array_modify(self, values, index, length=UNSET) -> ProcessBuilder: + """ + Change the content of an array (remove, insert, update) + + :param self: The array to modify. + :param values: The values to insert into the `data` array. + :param index: The index in the `data` array of the element to insert the value(s) before. If the index + is greater than the number of elements in the `data` array, the process throws an + `ArrayElementNotAvailable` exception. To insert after the last element, there are two options: 1. Use + the simpler processes ``array_append()`` to append a single value or ``array_concat()`` to append + multiple values. 2. Specify the number of elements in the array. You can retrieve the number of + elements with the process ``count()``, having the parameter `condition` set to `true`. + :param length: The number of elements in the `data` array to remove (or replace) starting from the + given index. If the array contains fewer elements, the process simply removes all elements up to the + end. + + :return: An array with values added, updated or removed. + """ + return array_modify(data=self, values=values, index=index, length=length) + + @openeo_process + def arsinh(self) -> ProcessBuilder: + """ + Inverse hyperbolic sine + + :param self: A number. + + :return: The computed angle in radians. + """ + return arsinh(x=self) + + @openeo_process + def artanh(self) -> ProcessBuilder: + """ + Inverse hyperbolic tangent + + :param self: A number. + + :return: The computed angle in radians. + """ + return artanh(x=self) + + @openeo_process + def atmospheric_correction(self, method, elevation_model=UNSET, options=UNSET) -> ProcessBuilder: + """ + Apply atmospheric correction + + :param self: Data cube containing multi-spectral optical top of atmosphere reflectances to be + corrected. + :param method: The atmospheric correction method to use. To get reproducible results, you have to set a + specific method. Set to `null` to allow the back-end to choose, which will improve portability, but + reduce reproducibility as you *may* get different results if you run the processes multiple times. + :param elevation_model: The digital elevation model to use. Set to `null` (the default) to allow the + back-end to choose, which will improve portability, but reduce reproducibility. + :param options: Proprietary options for the atmospheric correction method. Specifying proprietary + options will reduce portability. + + :return: Data cube containing bottom of atmosphere reflectances. + """ + return atmospheric_correction(data=self, method=method, elevation_model=elevation_model, options=options) + + @openeo_process + def between(self, min, max, exclude_max=UNSET) -> ProcessBuilder: + """ + Between comparison + + :param self: The value to check. + :param min: Lower boundary (inclusive) to check against. + :param max: Upper boundary (inclusive) to check against. + :param exclude_max: Exclude the upper boundary `max` if set to `true`. Defaults to `false`. + + :return: `true` if `x` is between the specified bounds, otherwise `false`. + """ + return between(x=self, min=min, max=max, exclude_max=exclude_max) + + @openeo_process + def ceil(self) -> ProcessBuilder: + """ + Round fractions up + + :param self: A number to round up. + + :return: The number rounded up. + """ + return ceil(x=self) + + @openeo_process + def climatological_normal(self, period, climatology_period=UNSET) -> ProcessBuilder: + """ + Compute climatology normals + + :param self: A data cube with exactly one temporal dimension. The data cube must span at least the + temporal interval specified in the parameter `climatology-period`. Seasonal periods may span two + consecutive years, e.g. temporal winter that includes months December, January and February. If the + required months before the actual climate period are available, the season is taken into account. If + not available, the first season is not taken into account and the seasonal mean is based on one year + less than the other seasonal normals. The incomplete season at the end of the last year is never taken + into account. + :param period: The time intervals to aggregate the average value for. The following pre-defined + frequencies are supported: * `day`: Day of the year * `month`: Month of the year * `climatology- + period`: The period specified in the `climatology-period`. * `season`: Three month periods of the + calendar seasons (December - February, March - May, June - August, September - November). * `tropical- + season`: Six month periods of the tropical seasons (November - April, May - October). + :param climatology_period: The climatology period as a closed temporal interval. The first element of + the array is the first year to be fully included in the temporal interval. The second element is the + last year to be fully included in the temporal interval. The default climatology period is from 1981 + until 2010 (both inclusive) right now, but this might be updated over time to what is commonly used in + climatology. If you don't want to keep your research to be reproducible, please explicitly specify a + period. + + :return: A data cube with the same dimensions. The dimension properties (name, type, labels, reference + system and resolution) remain unchanged, except for the resolution and dimension labels of the temporal + dimension. The temporal dimension has the following dimension labels: * `day`: `001` - `365` * + `month`: `01` - `12` * `climatology-period`: `climatology-period` * `season`: `djf` (December - + February), `mam` (March - May), `jja` (June - August), `son` (September - November) * `tropical- + season`: `ndjfma` (November - April), `mjjaso` (May - October) + """ + return climatological_normal(data=self, period=period, climatology_period=climatology_period) + + @openeo_process + def clip(self, min, max) -> ProcessBuilder: + """ + Clip a value between a minimum and a maximum + + :param self: A number. + :param min: Minimum value. If the value is lower than this value, the process will return the value of + this parameter. + :param max: Maximum value. If the value is greater than this value, the process will return the value + of this parameter. + + :return: The value clipped to the specified range. + """ + return clip(x=self, min=min, max=max) + + @openeo_process + def cloud_detection(self, method, options=UNSET) -> ProcessBuilder: + """ + Create cloud masks + + :param self: The source data cube containing multi-spectral optical top of the atmosphere (TOA) + reflectances on which to perform cloud detection. + :param method: The cloud detection method to use. To get reproducible results, you have to set a + specific method. Set to `null` to allow the back-end to choose, which will improve portability, but + reduce reproducibility as you *may* get different results if you run the processes multiple times. + :param options: Proprietary options for the cloud detection method. Specifying proprietary options will + reduce portability. + + :return: A data cube with bands for the atmospheric disturbances. Each of the masks contains values + between 0 and 1. The data cube has the same spatial and temporal dimensions as the source data cube and + a dimension that contains a dimension label for each of the supported/considered atmospheric + disturbance. + """ + return cloud_detection(data=self, method=method, options=options) + + @openeo_process + def constant(self) -> ProcessBuilder: + """ + Define a constant value + + :param self: The value of the constant. + + :return: The value of the constant. + """ + return constant(x=self) + + @openeo_process + def cos(self) -> ProcessBuilder: + """ + Cosine + + :param self: An angle in radians. + + :return: The computed cosine of `x`. + """ + return cos(x=self) + + @openeo_process + def cosh(self) -> ProcessBuilder: + """ + Hyperbolic cosine + + :param self: An angle in radians. + + :return: The computed hyperbolic cosine of `x`. + """ + return cosh(x=self) + + @openeo_process + def count(self, condition=UNSET, context=UNSET) -> ProcessBuilder: + """ + Count the number of elements + + :param self: An array with elements of any data type. + :param condition: A condition consists of one or more processes, which in the end return a boolean + value. It is evaluated against each element in the array. An element is counted only if the condition + returns `true`. Defaults to count valid elements in a list (see ``is_valid()``). Setting this parameter + to boolean `true` counts all elements in the list. `false` is not a valid value for this parameter. + :param context: Additional data to be passed to the condition. + + :return: The counted number of elements. + """ + return count(data=self, condition=condition, context=context) + + @openeo_process + def create_data_cube(self) -> ProcessBuilder: + """ + Create an empty data cube + + :return: An empty data cube with no dimensions. + """ + return create_data_cube() + + @openeo_process + def cummax(self, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Cumulative maxima + + :param self: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not and ignores them by default. + Setting this flag to `false` considers no-data values so that `null` is set for all the following + elements. + + :return: An array with the computed cumulative maxima. + """ + return cummax(data=self, ignore_nodata=ignore_nodata) + + @openeo_process + def cummin(self, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Cumulative minima + + :param self: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not and ignores them by default. + Setting this flag to `false` considers no-data values so that `null` is set for all the following + elements. + + :return: An array with the computed cumulative minima. + """ + return cummin(data=self, ignore_nodata=ignore_nodata) + + @openeo_process + def cumproduct(self, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Cumulative products + + :param self: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not and ignores them by default. + Setting this flag to `false` considers no-data values so that `null` is set for all the following + elements. + + :return: An array with the computed cumulative products. + """ + return cumproduct(data=self, ignore_nodata=ignore_nodata) + + @openeo_process + def cumsum(self, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Cumulative sums + + :param self: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not and ignores them by default. + Setting this flag to `false` considers no-data values so that `null` is set for all the following + elements. + + :return: An array with the computed cumulative sums. + """ + return cumsum(data=self, ignore_nodata=ignore_nodata) + + @openeo_process + def date_between(self, min, max, exclude_max=UNSET) -> ProcessBuilder: + """ + Between comparison for dates and times + + :param self: The value to check. + :param min: Lower boundary (inclusive) to check against. + :param max: Upper boundary (inclusive) to check against. + :param exclude_max: Exclude the upper boundary `max` if set to `true`. Defaults to `false`. + + :return: `true` if `x` is between the specified bounds, otherwise `false`. + """ + return date_between(x=self, min=min, max=max, exclude_max=exclude_max) + + @openeo_process + def date_difference(self, date2, unit=UNSET) -> ProcessBuilder: + """ + Computes the difference between two time instants + + :param self: The base date, optionally with a time component. + :param date2: The other date, optionally with a time component. + :param unit: The unit for the returned value. The following units are available: - millisecond - + second - leap seconds are ignored in computations. - minute - hour - day - month - year + + :return: Returns the difference between date1 and date2 in the given unit (seconds by default), + including a fractional part if required. For comparison purposes this means: - If `date1` < `date2`, + the returned value is positive. - If `date1` = `date2`, the returned value is 0. - If `date1` > + `date2`, the returned value is negative. + """ + return date_difference(date1=self, date2=date2, unit=unit) + + @openeo_process + def date_shift(self, value, unit) -> ProcessBuilder: + """ + Manipulates dates and times by addition or subtraction + + :param self: The date (and optionally time) to manipulate. If the given date doesn't include the time, + the process assumes that the time component is `00:00:00Z` (i.e. midnight, in UTC). The millisecond + part of the time is optional and defaults to `0` if not given. + :param value: The period of time in the unit given that is added (positive numbers) or subtracted + (negative numbers). The value `0` doesn't have any effect. + :param unit: The unit for the value given. The following pre-defined units are available: - + millisecond: Milliseconds - second: Seconds - leap seconds are ignored in computations. - minute: + Minutes - hour: Hours - day: Days - changes only the the day part of a date - week: Weeks (equivalent + to 7 days) - month: Months - year: Years Manipulations with the unit `year`, `month`, `week` or `day` + do never change the time. If any of the manipulations result in an invalid date or time, the + corresponding part is rounded down to the next valid date or time respectively. For example, adding a + month to `2020-01-31` would result in `2020-02-29`. + + :return: The manipulated date. If a time component was given in the parameter `date`, the time + component is returned with the date. + """ + return date_shift(date=self, value=value, unit=unit) + + @openeo_process + def dimension_labels(self, dimension) -> ProcessBuilder: + """ + Get the dimension labels + + :param self: The data cube. + :param dimension: The name of the dimension to get the labels for. + + :return: The labels as an array. + """ + return dimension_labels(data=self, dimension=dimension) + + @openeo_process + def divide(self, y) -> ProcessBuilder: + """ + Division of two numbers + + :param self: The dividend. + :param y: The divisor. + + :return: The computed result. + """ + return divide(x=self, y=y) + + @openeo_process + def drop_dimension(self, name) -> ProcessBuilder: + """ + Remove a dimension + + :param self: The data cube to drop a dimension from. + :param name: Name of the dimension to drop. + + :return: A data cube without the specified dimension. The number of dimensions decreases by one, but + the dimension properties (name, type, labels, reference system and resolution) for all other dimensions + remain unchanged. + """ + return drop_dimension(data=self, name=name) + + @openeo_process + def e(self) -> ProcessBuilder: + """ + Euler's number (e) + + :return: The numerical value of Euler's number. + """ + return e() + + @openeo_process + def eq(self, y, delta=UNSET, case_sensitive=UNSET) -> ProcessBuilder: + """ + Equal to comparison + + :param self: First operand. + :param y: Second operand. + :param delta: Only applicable for comparing two numbers. If this optional parameter is set to a + positive non-zero number the equality of two numbers is checked against a delta value. This is + especially useful to circumvent problems with floating-point inaccuracy in machine-based computation. + This option is basically an alias for the following computation: `lte(abs(minus([x, y]), delta)` + :param case_sensitive: Only applicable for comparing two strings. Case sensitive comparison can be + disabled by setting this parameter to `false`. + + :return: `true` if `x` is equal to `y`, `null` if any operand is `null`, otherwise `false`. + """ + return eq(x=self, y=y, delta=delta, case_sensitive=case_sensitive) + + @openeo_process + def exp(self) -> ProcessBuilder: + """ + Exponentiation to the base e + + :param self: The numerical exponent. + + :return: The computed value for *e* raised to the power of `p`. + """ + return exp(p=self) + + @openeo_process + def extrema(self, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Minimum and maximum values + + :param self: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. + Setting this flag to `false` considers no-data values so that an array with two `null` values is + returned if any value is such a value. + + :return: An array containing the minimum and maximum values for the specified numbers. The first + element is the minimum, the second element is the maximum. If the input array is empty both elements + are set to `null`. + """ + return extrema(data=self, ignore_nodata=ignore_nodata) + + @openeo_process + def filter_bands(self, bands=UNSET, wavelengths=UNSET) -> ProcessBuilder: + """ + Filter the bands by names + + :param self: A data cube with bands. + :param bands: A list of band names. Either the unique band name (metadata field `name` in bands) or one + of the common band names (metadata field `common_name` in bands). If the unique band name and the + common name conflict, the unique band name has a higher priority. The order of the specified array + defines the order of the bands in the data cube. If multiple bands match a common name, all matched + bands are included in the original order. + :param wavelengths: A list of sub-lists with each sub-list consisting of two elements. The first + element is the minimum wavelength and the second element is the maximum wavelength. Wavelengths are + specified in micrometers (μm). The order of the specified array defines the order of the bands in the + data cube. If multiple bands match the wavelengths, all matched bands are included in the original + order. + + :return: A data cube limited to a subset of its original bands. The dimensions and dimension properties + (name, type, labels, reference system and resolution) remain unchanged, except that the dimension of + type `bands` has less (or the same) dimension labels. + """ + return filter_bands(data=self, bands=bands, wavelengths=wavelengths) + + @openeo_process + def filter_bbox(self, extent) -> ProcessBuilder: + """ + Spatial filter using a bounding box + + :param self: A data cube. + :param extent: A bounding box, which may include a vertical axis (see `base` and `height`). + + :return: A data cube restricted to the bounding box. The dimensions and dimension properties (name, + type, labels, reference system and resolution) remain unchanged, except that the spatial dimensions + have less (or the same) dimension labels. + """ + return filter_bbox(data=self, extent=extent) + + @openeo_process + def filter_labels(self, condition, dimension, context=UNSET) -> ProcessBuilder: + """ + Filter dimension labels based on a condition + + :param self: A data cube. + :param condition: A condition that is evaluated against each dimension label in the specified + dimension. A dimension label and the corresponding data is preserved for the given dimension, if the + condition returns `true`. + :param dimension: The name of the dimension to filter on. Fails with a `DimensionNotAvailable` + exception if the specified dimension does not exist. + :param context: Additional data to be passed to the condition. + + :return: A data cube with the same dimensions. The dimension properties (name, type, labels, reference + system and resolution) remain unchanged, except that the given dimension has less (or the same) + dimension labels. + """ + return filter_labels( + data=self, + condition=build_child_callback(condition, parent_parameters=['value', 'context']), + dimension=dimension, + context=context + ) + + @openeo_process + def filter_spatial(self, geometries) -> ProcessBuilder: + """ + Spatial filter raster data cubes using geometries + + :param self: A raster data cube. + :param geometries: One or more geometries used for filtering, given as GeoJSON or vector data cube. If + multiple geometries are provided, the union of them is used. Empty geometries are ignored. Limits the + data cube to the bounding box of the given geometries. No implicit masking gets applied. To mask the + pixels of the data cube use ``mask_polygon()``. + + :return: A raster data cube restricted to the specified geometries. The dimensions and dimension + properties (name, type, labels, reference system and resolution) remain unchanged, except that the + spatial dimensions have less (or the same) dimension labels. + """ + return filter_spatial(data=self, geometries=geometries) + + @openeo_process + def filter_temporal(self, extent, dimension=UNSET) -> ProcessBuilder: + """ + Temporal filter based on temporal intervals + + :param self: A data cube. + :param extent: Left-closed temporal interval, i.e. an array with exactly two elements: 1. The first + element is the start of the temporal interval. The specified time instant is **included** in the + interval. 2. The second element is the end of the temporal interval. The specified time instant is + **excluded** from the interval. The second element must always be greater/later than the first + element. Otherwise, a `TemporalExtentEmpty` exception is thrown. Also supports unbounded intervals by + setting one of the boundaries to `null`, but never both. + :param dimension: The name of the temporal dimension to filter on. If no specific dimension is + specified, the filter applies to all temporal dimensions. Fails with a `DimensionNotAvailable` + exception if the specified dimension does not exist. + + :return: A data cube restricted to the specified temporal extent. The dimensions and dimension + properties (name, type, labels, reference system and resolution) remain unchanged, except that the + temporal dimensions (determined by `dimensions` parameter) may have less dimension labels. + """ + return filter_temporal(data=self, extent=extent, dimension=dimension) + + @openeo_process + def filter_vector(self, geometries, relation=UNSET) -> ProcessBuilder: + """ + Spatial vector filter using geometries + + :param self: A vector data cube with the candidate geometries. + :param geometries: One or more base geometries used for filtering, given as vector data cube. If + multiple base geometries are provided, the union of them is used. + :param relation: The spatial filter predicate for comparing the geometries provided through (a) + `geometries` (base geometries) and (b) `data` (candidate geometries). + + :return: A vector data cube restricted to the specified geometries. The dimensions and dimension + properties (name, type, labels, reference system and resolution) remain unchanged, except that the + geometries dimension has less (or the same) dimension labels. + """ + return filter_vector(data=self, geometries=geometries, relation=relation) + + @openeo_process + def first(self, ignore_nodata=UNSET) -> ProcessBuilder: + """ + First element + + :param self: An array with elements of any data type. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. + Setting this flag to `false` considers no-data values so that `null` is returned if the first value is + such a value. + + :return: The first element of the input array. + """ + return first(data=self, ignore_nodata=ignore_nodata) + + @openeo_process + def fit_curve(self, parameters, function, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Curve fitting + + :param self: A labeled array, the labels correspond to the variable `y` and the values correspond to + the variable `x`. + :param parameters: Defined the number of parameters for the model function and provides an initial + guess for them. At least one parameter is required. + :param function: The model function. It must take the parameters to fit as array through the first + argument and the independent variable `x` as the second argument. It is recommended to store the model + function as a user-defined process on the back-end to be able to re-use the model function with the + computed optimal values for the parameters afterwards. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. + Setting this flag to `false` considers no-data values so that `null` is passed to the model function. + + :return: An array with the optimal values for the parameters. + """ + return fit_curve( + data=self, + parameters=parameters, + function=build_child_callback(function, parent_parameters=['x', 'parameters']), + ignore_nodata=ignore_nodata + ) + + @openeo_process + def flatten_dimensions(self, dimensions, target_dimension, label_separator=UNSET) -> ProcessBuilder: + """ + Combine multiple dimensions into a single dimension + + :param self: A data cube. + :param dimensions: The names of the dimension to combine. The order of the array defines the order in + which the dimension labels and values are combined (see the example in the process description). Fails + with a `DimensionNotAvailable` exception if at least one of the specified dimensions does not exist. + :param target_dimension: The name of the new target dimension. A new dimensions will be created with + the given names and type `other` (see ``add_dimension()``). Fails with a `TargetDimensionExists` + exception if a dimension with the specified name exists. + :param label_separator: The string that will be used as a separator for the concatenated dimension + labels. To unambiguously revert the dimension labels with the process ``unflatten_dimension()``, the + given string must not be contained in any of the dimension labels. + + :return: A data cube with the new shape. The dimension properties (name, type, labels, reference system + and resolution) for all other dimensions remain unchanged. + """ + return flatten_dimensions(data=self, dimensions=dimensions, target_dimension=target_dimension, label_separator=label_separator) + + @openeo_process + def floor(self) -> ProcessBuilder: + """ + Round fractions down + + :param self: A number to round down. + + :return: The number rounded down. + """ + return floor(x=self) + + @openeo_process + def gt(self, y) -> ProcessBuilder: + """ + Greater than comparison + + :param self: First operand. + :param y: Second operand. + + :return: `true` if `x` is strictly greater than `y` or `null` if any operand is `null`, otherwise + `false`. + """ + return gt(x=self, y=y) + + @openeo_process + def gte(self, y) -> ProcessBuilder: + """ + Greater than or equal to comparison + + :param self: First operand. + :param y: Second operand. + + :return: `true` if `x` is greater than or equal to `y`, `null` if any operand is `null`, otherwise + `false`. + """ + return gte(x=self, y=y) + + @openeo_process + def if_(self, accept, reject=UNSET) -> ProcessBuilder: + """ + If-Then-Else conditional + + :param self: A boolean value. + :param accept: A value that is returned if the boolean value is `true`. + :param reject: A value that is returned if the boolean value is **not** `true`. Defaults to `null`. + + :return: Either the `accept` or `reject` argument depending on the given boolean value. + """ + return if_(value=self, accept=accept, reject=reject) + + @openeo_process + def inspect(self, message=UNSET, code=UNSET, level=UNSET) -> ProcessBuilder: + """ + Add information to the logs + + :param self: Data to log. + :param message: A message to send in addition to the data. + :param code: A label to help identify one or more log entries originating from this process in the list + of all log entries. It can help to group or filter log entries and is usually not unique. + :param level: The severity level of this message, defaults to `info`. + + :return: The data as passed to the `data` parameter without any modification. + """ + return inspect(data=self, message=message, code=code, level=level) + + @openeo_process + def int(self) -> ProcessBuilder: + """ + Integer part of a number + + :param self: A number. + + :return: Integer part of the number. + """ + return int(x=self) + + @openeo_process + def is_infinite(self) -> ProcessBuilder: + """ + Value is an infinite number + + :param self: The data to check. + + :return: `true` if the data is an infinite number, otherwise `false`. + """ + return is_infinite(x=self) + + @openeo_process + def is_nan(self) -> ProcessBuilder: + """ + Value is not a number + + :param self: The data to check. + + :return: Returns `true` for `NaN` and all non-numeric data types, otherwise returns `false`. + """ + return is_nan(x=self) + + @openeo_process + def is_nodata(self) -> ProcessBuilder: + """ + Value is a no-data value + + :param self: The data to check. + + :return: `true` if the data is a no-data value, otherwise `false`. + """ + return is_nodata(x=self) + + @openeo_process + def is_valid(self) -> ProcessBuilder: + """ + Value is valid data + + :param self: The data to check. + + :return: `true` if the data is valid, otherwise `false`. + """ + return is_valid(x=self) + + @openeo_process + def last(self, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Last element + + :param self: An array with elements of any data type. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. + Setting this flag to `false` considers no-data values so that `null` is returned if the last value is + such a value. + + :return: The last element of the input array. + """ + return last(data=self, ignore_nodata=ignore_nodata) + + @openeo_process + def linear_scale_range(self, inputMin, inputMax, outputMin=UNSET, outputMax=UNSET) -> ProcessBuilder: + """ + Linear transformation between two ranges + + :param self: A number to transform. The number gets clipped to the bounds specified in `inputMin` and + `inputMax`. + :param inputMin: Minimum value the input can obtain. + :param inputMax: Maximum value the input can obtain. + :param outputMin: Minimum value of the desired output range. + :param outputMax: Maximum value of the desired output range. + + :return: The transformed number. + """ + return linear_scale_range(x=self, inputMin=inputMin, inputMax=inputMax, outputMin=outputMin, outputMax=outputMax) + + @openeo_process + def ln(self) -> ProcessBuilder: + """ + Natural logarithm + + :param self: A number to compute the natural logarithm for. + + :return: The computed natural logarithm. + """ + return ln(x=self) + + @openeo_process + def load_collection(self, spatial_extent, temporal_extent, bands=UNSET, properties=UNSET) -> ProcessBuilder: + """ + Load a collection + + :param self: The collection id. + :param spatial_extent: Limits the data to load from the collection to the specified bounding box or + polygons. * For raster data, the process loads the pixel into the data cube if the point at the pixel + center intersects with the bounding box or any of the polygons (as defined in the Simple Features + standard by the OGC). * For vector data, the process loads the geometry into the data cube if the + geometry is fully *within* the bounding box or any of the polygons (as defined in the Simple Features + standard by the OGC). Empty geometries may only be in the data cube if no spatial extent has been + provided. The GeoJSON can be one of the following feature types: * A `Polygon` or `MultiPolygon` + geometry, * a `Feature` with a `Polygon` or `MultiPolygon` geometry, or * a `FeatureCollection` + containing at least one `Feature` with `Polygon` or `MultiPolygon` geometries. * Empty geometries are + ignored. Set this parameter to `null` to set no limit for the spatial extent. Be careful with this + when loading large datasets! It is recommended to use this parameter instead of using ``filter_bbox()`` + or ``filter_spatial()`` directly after loading unbounded data. + :param temporal_extent: Limits the data to load from the collection to the specified left-closed + temporal interval. Applies to all temporal dimensions. The interval has to be specified as an array + with exactly two elements: 1. The first element is the start of the temporal interval. The specified + time instant is **included** in the interval. 2. The second element is the end of the temporal + interval. The specified time instant is **excluded** from the interval. The second element must always + be greater/later than the first element. Otherwise, a `TemporalExtentEmpty` exception is thrown. Also + supports unbounded intervals by setting one of the boundaries to `null`, but never both. Set this + parameter to `null` to set no limit for the temporal extent. Be careful with this when loading large + datasets! It is recommended to use this parameter instead of using ``filter_temporal()`` directly after + loading unbounded data. + :param bands: Only adds the specified bands into the data cube so that bands that don't match the list + of band names are not available. Applies to all dimensions of type `bands`. Either the unique band + name (metadata field `name` in bands) or one of the common band names (metadata field `common_name` in + bands) can be specified. If the unique band name and the common name conflict, the unique band name has + a higher priority. The order of the specified array defines the order of the bands in the data cube. + If multiple bands match a common name, all matched bands are included in the original order. It is + recommended to use this parameter instead of using ``filter_bands()`` directly after loading unbounded + data. + :param properties: Limits the data by metadata properties to include only data in the data cube which + all given conditions return `true` for (AND operation). Specify key-value-pairs with the key being the + name of the metadata property, which can be retrieved with the openEO Data Discovery for Collections. + The value must be a condition (user-defined process) to be evaluated against the collection metadata, + see the example. + + :return: A data cube for further processing. The dimensions and dimension properties (name, type, + labels, reference system and resolution) correspond to the collection's metadata, but the dimension + labels are restricted as specified in the parameters. + """ + return load_collection(id=self, spatial_extent=spatial_extent, temporal_extent=temporal_extent, bands=bands, properties=properties) + + @openeo_process + def load_geojson(self, properties=UNSET) -> ProcessBuilder: + """ + Converts GeoJSON into a vector data cube + + :param self: A GeoJSON object to convert into a vector data cube. The GeoJSON type `GeometryCollection` + is not supported. Each geometry in the GeoJSON data results in a dimension label in the `geometries` + dimension. + :param properties: A list of properties from the GeoJSON file to construct an additional dimension + from. A new dimension with the name `properties` and type `other` is created if at least one property + is provided. Only applies for GeoJSON Features and FeatureCollections. Missing values are generally set + to no-data (`null`). Depending on the number of properties provided, the process creates the dimension + differently: - Single property with scalar values: A single dimension label with the name of the + property and a single value per geometry. - Single property of type array: The dimension labels + correspond to the array indices. There are as many values and labels per geometry as there are for the + largest array. - Multiple properties with scalar values: The dimension labels correspond to the + property names. There are as many values and labels per geometry as there are properties provided here. + + :return: A vector data cube containing the geometries, either one or two dimensional. + """ + return load_geojson(data=self, properties=properties) + + @openeo_process + def load_ml_model(self) -> ProcessBuilder: + """ + Load a ML model + + :param self: The STAC Item to load the machine learning model from. The STAC Item must implement the + `ml-model` extension. + + :return: A machine learning model to be used with machine learning processes such as + ``predict_random_forest()``. + """ + return load_ml_model(id=self) + + @openeo_process + def load_result(self, spatial_extent=UNSET, temporal_extent=UNSET, bands=UNSET) -> ProcessBuilder: + """ + Load batch job results + + :param self: The id of a batch job with results. + :param spatial_extent: Limits the data to load from the batch job result to the specified bounding box + or polygons. * For raster data, the process loads the pixel into the data cube if the point at the + pixel center intersects with the bounding box or any of the polygons (as defined in the Simple Features + standard by the OGC). * For vector data, the process loads the geometry into the data cube of the + geometry is fully within the bounding box or any of the polygons (as defined in the Simple Features + standard by the OGC). Empty geometries may only be in the data cube if no spatial extent has been + provided. The GeoJSON can be one of the following feature types: * A `Polygon` or `MultiPolygon` + geometry, * a `Feature` with a `Polygon` or `MultiPolygon` geometry, or * a `FeatureCollection` + containing at least one `Feature` with `Polygon` or `MultiPolygon` geometries. Set this parameter to + `null` to set no limit for the spatial extent. Be careful with this when loading large datasets! It is + recommended to use this parameter instead of using ``filter_bbox()`` or ``filter_spatial()`` directly + after loading unbounded data. + :param temporal_extent: Limits the data to load from the batch job result to the specified left-closed + temporal interval. Applies to all temporal dimensions. The interval has to be specified as an array + with exactly two elements: 1. The first element is the start of the temporal interval. The specified + instance in time is **included** in the interval. 2. The second element is the end of the temporal + interval. The specified instance in time is **excluded** from the interval. The specified temporal + strings follow [RFC 3339](https://www.rfc-editor.org/rfc/rfc3339.html). Also supports open intervals by + setting one of the boundaries to `null`, but never both. Set this parameter to `null` to set no limit + for the temporal extent. Be careful with this when loading large datasets! It is recommended to use + this parameter instead of using ``filter_temporal()`` directly after loading unbounded data. + :param bands: Only adds the specified bands into the data cube so that bands that don't match the list + of band names are not available. Applies to all dimensions of type `bands`. Either the unique band + name (metadata field `name` in bands) or one of the common band names (metadata field `common_name` in + bands) can be specified. If the unique band name and the common name conflict, the unique band name has + a higher priority. The order of the specified array defines the order of the bands in the data cube. + If multiple bands match a common name, all matched bands are included in the original order. It is + recommended to use this parameter instead of using ``filter_bands()`` directly after loading unbounded + data. + + :return: A data cube for further processing. + """ + return load_result(id=self, spatial_extent=spatial_extent, temporal_extent=temporal_extent, bands=bands) + + @openeo_process + def load_stac(self, spatial_extent=UNSET, temporal_extent=UNSET, bands=UNSET, properties=UNSET) -> ProcessBuilder: + """ + Loads data from STAC + + :param self: The URL to a static STAC catalog (STAC Item, STAC Collection, or STAC Catalog) or a + specific STAC API Collection that allows to filter items and to download assets. This includes batch + job results, which itself are compliant to STAC. For external URLs, authentication details such as API + keys or tokens may need to be included in the URL. Batch job results can be specified in two ways: - + For Batch job results at the same back-end, a URL pointing to the corresponding batch job results + endpoint should be provided. The URL usually ends with `/jobs/{id}/results` and `{id}` is the + corresponding batch job ID. - For external results, a signed URL must be provided. Not all back-ends + support signed URLs, which are provided as a link with the link relation `canonical` in the batch job + result metadata. + :param spatial_extent: Limits the data to load to the specified bounding box or polygons. * For raster + data, the process loads the pixel into the data cube if the point at the pixel center intersects with + the bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). * For + vector data, the process loads the geometry into the data cube if the geometry is fully within the + bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). Empty + geometries may only be in the data cube if no spatial extent has been provided. The GeoJSON can be one + of the following feature types: * A `Polygon` or `MultiPolygon` geometry, * a `Feature` with a + `Polygon` or `MultiPolygon` geometry, or * a `FeatureCollection` containing at least one `Feature` with + `Polygon` or `MultiPolygon` geometries. Set this parameter to `null` to set no limit for the spatial + extent. Be careful with this when loading large datasets! It is recommended to use this parameter + instead of using ``filter_bbox()`` or ``filter_spatial()`` directly after loading unbounded data. + :param temporal_extent: Limits the data to load to the specified left-closed temporal interval. Applies + to all temporal dimensions. The interval has to be specified as an array with exactly two elements: 1. + The first element is the start of the temporal interval. The specified instance in time is **included** + in the interval. 2. The second element is the end of the temporal interval. The specified instance in + time is **excluded** from the interval. The second element must always be greater/later than the first + element. Otherwise, a `TemporalExtentEmpty` exception is thrown. Also supports open intervals by + setting one of the boundaries to `null`, but never both. Set this parameter to `null` to set no limit + for the temporal extent. Be careful with this when loading large datasets! It is recommended to use + this parameter instead of using ``filter_temporal()`` directly after loading unbounded data. + :param bands: Only adds the specified bands into the data cube so that bands that don't match the list + of band names are not available. Applies to all dimensions of type `bands`. Either the unique band + name (metadata field `name` in bands) or one of the common band names (metadata field `common_name` in + bands) can be specified. If the unique band name and the common name conflict, the unique band name has + a higher priority. The order of the specified array defines the order of the bands in the data cube. + If multiple bands match a common name, all matched bands are included in the original order. It is + recommended to use this parameter instead of using ``filter_bands()`` directly after loading unbounded + data. + :param properties: Limits the data by metadata properties to include only data in the data cube which + all given conditions return `true` for (AND operation). Specify key-value-pairs with the key being the + name of the metadata property, which can be retrieved with the openEO Data Discovery for Collections. + The value must be a condition (user-defined process) to be evaluated against a STAC API. This parameter + is not supported for static STAC. + + :return: A data cube for further processing. + """ + return load_stac(url=self, spatial_extent=spatial_extent, temporal_extent=temporal_extent, bands=bands, properties=properties) + + @openeo_process + def load_uploaded_files(self, format, options=UNSET) -> ProcessBuilder: + """ + Load files from the user workspace + + :param self: The files to read. Folders can't be specified, specify all files instead. An exception is + thrown if a file can't be read. + :param format: The file format to read from. It must be one of the values that the server reports as + supported input file formats, which usually correspond to the short GDAL/OGR codes. If the format is + not suitable for loading the data, a `FormatUnsuitable` exception will be thrown. This parameter is + *case insensitive*. + :param options: The file format parameters to be used to read the files. Must correspond to the + parameters that the server reports as supported parameters for the chosen `format`. The parameter names + and valid values usually correspond to the GDAL/OGR format options. + + :return: A data cube for further processing. + """ + return load_uploaded_files(paths=self, format=format, options=options) + + @openeo_process + def load_url(self, format, options=UNSET) -> ProcessBuilder: + """ + Load data from a URL + + :param self: The URL to read from. Authentication details such as API keys or tokens may need to be + included in the URL. + :param format: The file format to use when loading the data. It must be one of the values that the + server reports as supported input file formats, which usually correspond to the short GDAL/OGR codes. + If the format is not suitable for loading the data, a `FormatUnsuitable` exception will be thrown. This + parameter is *case insensitive*. + :param options: The file format parameters to use when reading the data. Must correspond to the + parameters that the server reports as supported parameters for the chosen `format`. The parameter names + and valid values usually correspond to the GDAL/OGR format options. + + :return: A data cube for further processing. + """ + return load_url(url=self, format=format, options=options) + + @openeo_process + def log(self, base) -> ProcessBuilder: + """ + Logarithm to a base + + :param self: A number to compute the logarithm for. + :param base: The numerical base. + + :return: The computed logarithm. + """ + return log(x=self, base=base) + + @openeo_process + def lt(self, y) -> ProcessBuilder: + """ + Less than comparison + + :param self: First operand. + :param y: Second operand. + + :return: `true` if `x` is strictly less than `y`, `null` if any operand is `null`, otherwise `false`. + """ + return lt(x=self, y=y) + + @openeo_process + def lte(self, y) -> ProcessBuilder: + """ + Less than or equal to comparison + + :param self: First operand. + :param y: Second operand. + + :return: `true` if `x` is less than or equal to `y`, `null` if any operand is `null`, otherwise + `false`. + """ + return lte(x=self, y=y) + + @openeo_process + def mask(self, mask, replacement=UNSET) -> ProcessBuilder: + """ + Apply a raster mask + + :param self: A raster data cube. + :param mask: A mask as a raster data cube. Every pixel in `data` must have a corresponding element in + `mask`. + :param replacement: The value used to replace masked values with. + + :return: A masked raster data cube with the same dimensions. The dimension properties (name, type, + labels, reference system and resolution) remain unchanged. + """ + return mask(data=self, mask=mask, replacement=replacement) + + @openeo_process + def mask_polygon(self, mask, replacement=UNSET, inside=UNSET) -> ProcessBuilder: + """ + Apply a polygon mask + + :param self: A raster data cube. + :param mask: A GeoJSON object or a vector data cube containing at least one polygon. The provided + vector data can be one of the following: * A `Polygon` or `MultiPolygon` geometry, * a `Feature` with + a `Polygon` or `MultiPolygon` geometry, or * a `FeatureCollection` containing at least one `Feature` + with `Polygon` or `MultiPolygon` geometries. * Empty geometries are ignored. + :param replacement: The value used to replace masked values with. + :param inside: If set to `true` all pixels for which the point at the pixel center **does** intersect + with any polygon are replaced. + + :return: A masked raster data cube with the same dimensions. The dimension properties (name, type, + labels, reference system and resolution) remain unchanged. + """ + return mask_polygon(data=self, mask=mask, replacement=replacement, inside=inside) + + @openeo_process + def max(self, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Maximum value + + :param self: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. + Setting this flag to `false` considers no-data values so that `null` is returned if any value is such a + value. + + :return: The maximum value. + """ + return max(data=self, ignore_nodata=ignore_nodata) + + @openeo_process + def mean(self, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Arithmetic mean (average) + + :param self: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. + Setting this flag to `false` considers no-data values so that `null` is returned if any value is such a + value. + + :return: The computed arithmetic mean. + """ + return mean(data=self, ignore_nodata=ignore_nodata) + + @openeo_process + def median(self, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Statistical median + + :param self: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. + Setting this flag to `false` considers no-data values so that `null` is returned if any value is such a + value. + + :return: The computed statistical median. + """ + return median(data=self, ignore_nodata=ignore_nodata) + + @openeo_process + def merge_cubes(self, cube2, overlap_resolver=UNSET, context=UNSET) -> ProcessBuilder: + """ + Merge two data cubes + + :param self: The base data cube. + :param cube2: The other data cube to be merged with the base data cube. + :param overlap_resolver: A reduction operator that resolves the conflict if the data overlaps. The + reducer must return a value of the same data type as the input values are. The reduction operator may + be a single process such as ``multiply()`` or consist of multiple sub-processes. `null` (the default) + can be specified if no overlap resolver is required. + :param context: Additional data to be passed to the overlap resolver. + + :return: The merged data cube. See the process description for details regarding the dimensions and + dimension properties (name, type, labels, reference system and resolution). + """ + return merge_cubes( + cube1=self, + cube2=cube2, + overlap_resolver=(build_child_callback(overlap_resolver, parent_parameters=['x', 'y', 'context']) if overlap_resolver not in [None, UNSET] else overlap_resolver), + context=context + ) + + @openeo_process + def min(self, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Minimum value + + :param self: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. + Setting this flag to `false` considers no-data values so that `null` is returned if any value is such a + value. + + :return: The minimum value. + """ + return min(data=self, ignore_nodata=ignore_nodata) + + @openeo_process + def mod(self, y) -> ProcessBuilder: + """ + Modulo + + :param self: A number to be used as the dividend. + :param y: A number to be used as the divisor. + + :return: The remainder after division. + """ + return mod(x=self, y=y) + + @openeo_process + def multiply(self, y) -> ProcessBuilder: + """ + Multiplication of two numbers + + :param self: The multiplier. + :param y: The multiplicand. + + :return: The computed product of the two numbers. + """ + return multiply(x=self, y=y) + + @openeo_process + def nan(self) -> ProcessBuilder: + """ + Not a Number (NaN) + + :return: Returns `NaN`. + """ + return nan() + + @openeo_process + def ndvi(self, nir=UNSET, red=UNSET, target_band=UNSET) -> ProcessBuilder: + """ + Normalized Difference Vegetation Index + + :param self: A raster data cube with two bands that have the common names `red` and `nir` assigned. + :param nir: The name of the NIR band. Defaults to the band that has the common name `nir` assigned. + Either the unique band name (metadata field `name` in bands) or one of the common band names (metadata + field `common_name` in bands) can be specified. If the unique band name and the common name conflict, + the unique band name has a higher priority. + :param red: The name of the red band. Defaults to the band that has the common name `red` assigned. + Either the unique band name (metadata field `name` in bands) or one of the common band names (metadata + field `common_name` in bands) can be specified. If the unique band name and the common name conflict, + the unique band name has a higher priority. + :param target_band: By default, the dimension of type `bands` is dropped. To keep the dimension specify + a new band name in this parameter so that a new dimension label with the specified name will be added + for the computed values. + + :return: A raster data cube containing the computed NDVI values. The structure of the data cube differs + depending on the value passed to `target_band`: * `target_band` is `null`: The data cube does not + contain the dimension of type `bands`, the number of dimensions decreases by one. The dimension + properties (name, type, labels, reference system and resolution) for all other dimensions remain + unchanged. * `target_band` is a string: The data cube keeps the same dimensions. The dimension + properties remain unchanged, but the number of dimension labels for the dimension of type `bands` + increases by one. The additional label is named as specified in `target_band`. + """ + return ndvi(data=self, nir=nir, red=red, target_band=target_band) + + @openeo_process + def neq(self, y, delta=UNSET, case_sensitive=UNSET) -> ProcessBuilder: + """ + Not equal to comparison + + :param self: First operand. + :param y: Second operand. + :param delta: Only applicable for comparing two numbers. If this optional parameter is set to a + positive non-zero number the non-equality of two numbers is checked against a delta value. This is + especially useful to circumvent problems with floating-point inaccuracy in machine-based computation. + This option is basically an alias for the following computation: `gt(abs(minus([x, y]), delta)` + :param case_sensitive: Only applicable for comparing two strings. Case sensitive comparison can be + disabled by setting this parameter to `false`. + + :return: `true` if `x` is *not* equal to `y`, `null` if any operand is `null`, otherwise `false`. + """ + return neq(x=self, y=y, delta=delta, case_sensitive=case_sensitive) + + @openeo_process + def normalized_difference(self, y) -> ProcessBuilder: + """ + Normalized difference + + :param self: The value for the first band. + :param y: The value for the second band. + + :return: The computed normalized difference. + """ + return normalized_difference(x=self, y=y) + + @openeo_process + def not_(self) -> ProcessBuilder: + """ + Inverting a boolean + + :param self: Boolean value to invert. + + :return: Inverted boolean value. + """ + return not_(x=self) + + @openeo_process + def or_(self, y) -> ProcessBuilder: + """ + Logical OR + + :param self: A boolean value. + :param y: A boolean value. + + :return: Boolean result of the logical OR. + """ + return or_(x=self, y=y) + + @openeo_process + def order(self, asc=UNSET, nodata=UNSET) -> ProcessBuilder: + """ + Get the order of array elements + + :param self: An array to compute the order for. + :param asc: The default sort order is ascending, with smallest values first. To sort in reverse + (descending) order, set this parameter to `false`. + :param nodata: Controls the handling of no-data values (`null`). By default, they are removed. If set + to `true`, missing values in the data are put last; if set to `false`, they are put first. + + :return: The computed permutation. + """ + return order(data=self, asc=asc, nodata=nodata) + + @openeo_process + def pi(self) -> ProcessBuilder: + """ + Pi (π) + + :return: The numerical value of Pi. + """ + return pi() + + @openeo_process + def power(self, p) -> ProcessBuilder: + """ + Exponentiation + + :param self: The numerical base. + :param p: The numerical exponent. + + :return: The computed value for `base` raised to the power of `p`. + """ + return power(base=self, p=p) + + @openeo_process + def predict_curve(self, function, dimension, labels=UNSET) -> ProcessBuilder: + """ + Predict values + + :param self: A data cube with optimal values, e.g. computed by the process ``fit_curve()``. + :param function: The model function. It must take the parameters to fit as array through the first + argument and the independent variable `x` as the second argument. It is recommended to store the model + function as a user-defined process on the back-end. + :param dimension: The name of the dimension for predictions. + :param labels: The labels to predict values for. If no labels are given, predicts values only for no- + data (`null`) values in the data cube. + + :return: A data cube with the predicted values with the provided dimension `dimension` having as many + labels as provided through `labels`. + """ + return predict_curve( + parameters=self, + function=build_child_callback(function, parent_parameters=['x', 'parameters']), + dimension=dimension, + labels=labels + ) + + @openeo_process + def predict_random_forest(self, model) -> ProcessBuilder: + """ + Predict values based on a Random Forest model + + :param self: An array of numbers. + :param model: A model object that can be trained with the processes ``fit_regr_random_forest()`` + (regression) and ``fit_class_random_forest()`` (classification). + + :return: The predicted value. Returns `null` if any of the given values in the array is a no-data + value. + """ + return predict_random_forest(data=self, model=model) + + @openeo_process + def product(self, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Compute the product by multiplying numbers + + :param self: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. + Setting this flag to `false` considers no-data values so that `null` is returned if any value is such a + value. + + :return: The computed product of the sequence of numbers. + """ + return product(data=self, ignore_nodata=ignore_nodata) + + @openeo_process + def quantiles(self, probabilities=UNSET, q=UNSET, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Quantiles + + :param self: An array of numbers. + :param probabilities: Quantiles to calculate. Either a list of probabilities or the number of + intervals: * Provide an array with a sorted list of probabilities in ascending order to calculate + quantiles for. The probabilities must be between 0 and 1 (inclusive). If not sorted in ascending order, + an `AscendingProbabilitiesRequired` exception is thrown. * Provide an integer to specify the number of + intervals to calculate quantiles for. Calculates q-quantiles with equal-sized intervals. + :param q: Number of intervals to calculate quantiles for. Calculates q-quantiles with equal-sized + intervals. This parameter has been **deprecated**. Please use the parameter `probabilities` instead. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. + Setting this flag to `false` considers no-data values so that an array with `null` values is returned + if any element is such a value. + + :return: An array with the computed quantiles. The list has either * as many elements as the given + list of `probabilities` had or * *`q`-1* elements. If the input array is empty the resulting array is + filled with as many `null` values as required according to the list above. See the 'Empty array' + example for an example. + """ + return quantiles(data=self, probabilities=probabilities, q=q, ignore_nodata=ignore_nodata) + + @openeo_process + def rearrange(self, order) -> ProcessBuilder: + """ + Sort an array based on a permutation + + :param self: The array to rearrange. + :param order: The permutation used for rearranging. + + :return: The rearranged array. + """ + return rearrange(data=self, order=order) + + @openeo_process + def reduce_dimension(self, reducer, dimension, context=UNSET) -> ProcessBuilder: + """ + Reduce dimensions + + :param self: A data cube. + :param reducer: A reducer to apply on the specified dimension. A reducer is a single process such as + ``mean()`` or a set of processes, which computes a single value for a list of values, see the category + 'reducer' for such processes. + :param dimension: The name of the dimension over which to reduce. Fails with a `DimensionNotAvailable` + exception if the specified dimension does not exist. + :param context: Additional data to be passed to the reducer. + + :return: A data cube with the newly computed values. It is missing the given dimension, the number of + dimensions decreases by one. The dimension properties (name, type, labels, reference system and + resolution) for all other dimensions remain unchanged. + """ + return reduce_dimension( + data=self, + reducer=build_child_callback(reducer, parent_parameters=['data', 'context']), + dimension=dimension, + context=context + ) + + @openeo_process + def reduce_spatial(self, reducer, context=UNSET) -> ProcessBuilder: + """ + Reduce spatial dimensions 'x' and 'y' + + :param self: A raster data cube. + :param reducer: A reducer to apply on the horizontal spatial dimensions. A reducer is a single process + such as ``mean()`` or a set of processes, which computes a single value for a list of values, see the + category 'reducer' for such processes. + :param context: Additional data to be passed to the reducer. + + :return: A data cube with the newly computed values. It is missing the horizontal spatial dimensions, + the number of dimensions decreases by two. The dimension properties (name, type, labels, reference + system and resolution) for all other dimensions remain unchanged. + """ + return reduce_spatial(data=self, reducer=build_child_callback(reducer, parent_parameters=['data', 'context']), context=context) + + @openeo_process + def rename_dimension(self, source, target) -> ProcessBuilder: + """ + Rename a dimension + + :param self: The data cube. + :param source: The current name of the dimension. Fails with a `DimensionNotAvailable` exception if the + specified dimension does not exist. + :param target: A new Name for the dimension. Fails with a `DimensionExists` exception if a dimension + with the specified name exists. + + :return: A data cube with the same dimensions, but the name of one of the dimensions changes. The old + name can not be referred to any longer. The dimension properties (name, type, labels, reference system + and resolution) remain unchanged. + """ + return rename_dimension(data=self, source=source, target=target) + + @openeo_process + def rename_labels(self, dimension, target, source=UNSET) -> ProcessBuilder: + """ + Rename dimension labels + + :param self: The data cube. + :param dimension: The name of the dimension to rename the labels for. + :param target: The new names for the labels. If a target dimension label already exists in the data + cube, a `LabelExists` exception is thrown. + :param source: The original names of the labels to be renamed to corresponding array elements in the + parameter `target`. It is allowed to only specify a subset of labels to rename, as long as the `target` + and `source` parameter have the same length. The order of the labels doesn't need to match the order of + the dimension labels in the data cube. By default, the array is empty so that the dimension labels in + the data cube are expected to be enumerated. If the dimension labels are not enumerated and the given + array is empty, the `LabelsNotEnumerated` exception is thrown. If one of the source dimension labels + doesn't exist, the `LabelNotAvailable` exception is thrown. + + :return: The data cube with the same dimensions. The dimension properties (name, type, labels, + reference system and resolution) remain unchanged, except that for the given dimension the labels + change. The old labels can not be referred to any longer. The number of labels remains the same. + """ + return rename_labels(data=self, dimension=dimension, target=target, source=source) + + @openeo_process + def resample_cube_spatial(self, target, method=UNSET) -> ProcessBuilder: + """ + Resample the spatial dimensions to match a target data cube + + :param self: A raster data cube. + :param target: A raster data cube that describes the spatial target resolution. + :param method: Resampling method to use. The following options are available and are meant to align + with [`gdalwarp`](https://gdal.org/programs/gdalwarp.html#cmdoption-gdalwarp-r): * `average`: average + (mean) resampling, computes the weighted average of all valid pixels * `bilinear`: bilinear resampling + * `cubic`: cubic resampling * `cubicspline`: cubic spline resampling * `lanczos`: Lanczos windowed sinc + resampling * `max`: maximum resampling, selects the maximum value from all valid pixels * `med`: median + resampling, selects the median value of all valid pixels * `min`: minimum resampling, selects the + minimum value from all valid pixels * `mode`: mode resampling, selects the value which appears most + often of all the sampled points * `near`: nearest neighbour resampling (default) * `q1`: first quartile + resampling, selects the first quartile value of all valid pixels * `q3`: third quartile resampling, + selects the third quartile value of all valid pixels * `rms` root mean square (quadratic mean) of all + valid pixels * `sum`: compute the weighted sum of all valid pixels Valid pixels are determined based + on the function ``is_valid()``. + + :return: A raster data cube with the same dimensions. The dimension properties (name, type, labels, + reference system and resolution) remain unchanged, except for the resolution and dimension labels of + the spatial dimensions. + """ + return resample_cube_spatial(data=self, target=target, method=method) + + @openeo_process + def resample_cube_temporal(self, target, dimension=UNSET, valid_within=UNSET) -> ProcessBuilder: + """ + Resample temporal dimensions to match a target data cube + + :param self: A data cube with one or more temporal dimensions. + :param target: A data cube that describes the temporal target resolution. + :param dimension: The name of the temporal dimension to resample, which must exist with this name in + both data cubes. If the dimension is not set or is set to `null`, the process resamples all temporal + dimensions that exist with the same names in both data cubes. The following exceptions may occur: * A + dimension is given, but it does not exist in any of the data cubes: `DimensionNotAvailable` * A + dimension is given, but one of them is not temporal: `DimensionMismatch` * No specific dimension name + is given and there are no temporal dimensions with the same name in the data: `DimensionMismatch` + :param valid_within: Setting this parameter to a numerical value enables that the process searches for + valid values within the given period of days before and after the target timestamps. Valid values are + determined based on the function ``is_valid()``. For example, the limit of `7` for the target + timestamps `2020-01-15 12:00:00` looks for a nearest neighbor after `2020-01-08 12:00:00` and before + `2020-01-22 12:00:00`. If no valid value is found within the given period, the value will be set to no- + data (`null`). + + :return: A data cube with the same dimensions and the same dimension properties (name, type, labels, + reference system and resolution) for all non-temporal dimensions. For the temporal dimension, the name + and type remain unchanged, but the dimension labels, resolution and reference system may change. + """ + return resample_cube_temporal(data=self, target=target, dimension=dimension, valid_within=valid_within) + + @openeo_process + def resample_spatial(self, resolution=UNSET, projection=UNSET, method=UNSET, align=UNSET) -> ProcessBuilder: + """ + Resample and warp the spatial dimensions + + :param self: A raster data cube. + :param resolution: Resamples the data cube to the target resolution, which can be specified either as + separate values for x and y or as a single value for both axes. Specified in the units of the target + projection. Doesn't change the resolution by default (`0`). + :param projection: Warps the data cube to the target projection, specified as as [EPSG + code](http://www.epsg-registry.org/) or [WKT2 CRS + string](http://docs.opengeospatial.org/is/18-010r7/18-010r7.html). By default (`null`), the projection + is not changed. + :param method: Resampling method to use. The following options are available and are meant to align + with [`gdalwarp`](https://gdal.org/programs/gdalwarp.html#cmdoption-gdalwarp-r): * `average`: average + (mean) resampling, computes the weighted average of all valid pixels * `bilinear`: bilinear resampling + * `cubic`: cubic resampling * `cubicspline`: cubic spline resampling * `lanczos`: Lanczos windowed sinc + resampling * `max`: maximum resampling, selects the maximum value from all valid pixels * `med`: median + resampling, selects the median value of all valid pixels * `min`: minimum resampling, selects the + minimum value from all valid pixels * `mode`: mode resampling, selects the value which appears most + often of all the sampled points * `near`: nearest neighbour resampling (default) * `q1`: first quartile + resampling, selects the first quartile value of all valid pixels * `q3`: third quartile resampling, + selects the third quartile value of all valid pixels * `rms` root mean square (quadratic mean) of all + valid pixels * `sum`: compute the weighted sum of all valid pixels Valid pixels are determined based + on the function ``is_valid()``. + :param align: Specifies to which corner of the spatial extent the new resampled data is aligned to. + + :return: A raster data cube with values warped onto the new projection. It has the same dimensions and + the same dimension properties (name, type, labels, reference system and resolution) for all non-spatial + or vertical spatial dimensions. For the horizontal spatial dimensions the name and type remain + unchanged, but reference system, labels and resolution may change depending on the given parameters. + """ + return resample_spatial(data=self, resolution=resolution, projection=projection, method=method, align=align) + + @openeo_process + def round(self, p=UNSET) -> ProcessBuilder: + """ + Round to a specified precision + + :param self: A number to round. + :param p: A positive number specifies the number of digits after the decimal point to round to. A + negative number means rounding to a power of ten, so for example *-2* rounds to the nearest hundred. + Defaults to *0*. + + :return: The rounded number. + """ + return round(x=self, p=p) + + @openeo_process + def run_udf(self, udf, runtime, version=UNSET, context=UNSET) -> ProcessBuilder: + """ + Run a UDF + + :param self: The data to be passed to the UDF. + :param udf: Either source code, an absolute URL or a path to a UDF script. + :param runtime: A UDF runtime identifier available at the back-end. + :param version: An UDF runtime version. If set to `null`, the default runtime version specified for + each runtime is used. + :param context: Additional data such as configuration options to be passed to the UDF. + + :return: The data processed by the UDF. The returned value can be of any data type and is exactly what + the UDF code returns. + """ + return run_udf(data=self, udf=udf, runtime=runtime, version=version, context=context) + + @openeo_process + def run_udf_externally(self, url, context=UNSET) -> ProcessBuilder: + """ + Run an externally hosted UDF container + + :param self: The data to be passed to the UDF. + :param url: Absolute URL to a remote UDF service. + :param context: Additional data such as configuration options to be passed to the UDF. + + :return: The data processed by the UDF. The returned value can in principle be of any data type, but it + depends on what is returned by the UDF code. Please see the implemented UDF interface for details. + """ + return run_udf_externally(data=self, url=url, context=context) + + @openeo_process + def sar_backscatter(self, coefficient=UNSET, elevation_model=UNSET, mask=UNSET, contributing_area=UNSET, local_incidence_angle=UNSET, ellipsoid_incidence_angle=UNSET, noise_removal=UNSET, options=UNSET) -> ProcessBuilder: + """ + Computes backscatter from SAR input + + :param self: The source data cube containing SAR input. + :param coefficient: Select the radiometric correction coefficient. The following options are available: + * `beta0`: radar brightness * `sigma0-ellipsoid`: ground area computed with ellipsoid earth model * + `sigma0-terrain`: ground area computed with terrain earth model * `gamma0-ellipsoid`: ground area + computed with ellipsoid earth model in sensor line of sight * `gamma0-terrain`: ground area computed + with terrain earth model in sensor line of sight (default) * `null`: non-normalized backscatter + :param elevation_model: The digital elevation model to use. Set to `null` (the default) to allow the + back-end to choose, which will improve portability, but reduce reproducibility. + :param mask: If set to `true`, a data mask is added to the bands with the name `mask`. It indicates + which values are valid (1), invalid (0) or contain no-data (null). + :param contributing_area: If set to `true`, a DEM-based local contributing area band named + `contributing_area` is added. The values are given in square meters. + :param local_incidence_angle: If set to `true`, a DEM-based local incidence angle band named + `local_incidence_angle` is added. The values are given in degrees. + :param ellipsoid_incidence_angle: If set to `true`, an ellipsoidal incidence angle band named + `ellipsoid_incidence_angle` is added. The values are given in degrees. + :param noise_removal: If set to `false`, no noise removal is applied. Defaults to `true`, which removes + noise. + :param options: Proprietary options for the backscatter computations. Specifying proprietary options + will reduce portability. + + :return: Backscatter values corresponding to the chosen parametrization. The values are given in linear + scale. + """ + return sar_backscatter( + data=self, + coefficient=coefficient, + elevation_model=elevation_model, + mask=mask, + contributing_area=contributing_area, + local_incidence_angle=local_incidence_angle, + ellipsoid_incidence_angle=ellipsoid_incidence_angle, + noise_removal=noise_removal, + options=options + ) + + @openeo_process + def save_result(self, format, options=UNSET) -> ProcessBuilder: + """ + Save processed data + + :param self: The data to deliver in the given file format. + :param format: The file format to use. It must be one of the values that the server reports as + supported output file formats, which usually correspond to the short GDAL/OGR codes. This parameter is + *case insensitive*. * If the data cube is empty and the file format can't store empty data cubes, a + `DataCubeEmpty` exception is thrown. * If the file format is otherwise not suitable for storing the + underlying data structure, a `FormatUnsuitable` exception is thrown. + :param options: The file format parameters to be used to create the file(s). Must correspond to the + parameters that the server reports as supported parameters for the chosen `format`. The parameter names + and valid values usually correspond to the GDAL/OGR format options. + + :return: Always returns `true` as in case of an error an exception is thrown which aborts the execution + of the process. + """ + return save_result(data=self, format=format, options=options) + + @openeo_process + def sd(self, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Standard deviation + + :param self: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. + Setting this flag to `false` considers no-data values so that `null` is returned if any value is such a + value. + + :return: The computed sample standard deviation. + """ + return sd(data=self, ignore_nodata=ignore_nodata) + + @openeo_process + def sgn(self) -> ProcessBuilder: + """ + Signum + + :param self: A number. + + :return: The computed signum value of `x`. + """ + return sgn(x=self) + + @openeo_process + def sin(self) -> ProcessBuilder: + """ + Sine + + :param self: An angle in radians. + + :return: The computed sine of `x`. + """ + return sin(x=self) + + @openeo_process + def sinh(self) -> ProcessBuilder: + """ + Hyperbolic sine + + :param self: An angle in radians. + + :return: The computed hyperbolic sine of `x`. + """ + return sinh(x=self) + + @openeo_process + def sort(self, asc=UNSET, nodata=UNSET) -> ProcessBuilder: + """ + Sort data + + :param self: An array with data to sort. + :param asc: The default sort order is ascending, with smallest values first. To sort in reverse + (descending) order, set this parameter to `false`. + :param nodata: Controls the handling of no-data values (`null`). By default, they are removed. If set + to `true`, missing values in the data are put last; if set to `false`, they are put first. + + :return: The sorted array. + """ + return sort(data=self, asc=asc, nodata=nodata) + + @openeo_process + def sqrt(self) -> ProcessBuilder: + """ + Square root + + :param self: A number. + + :return: The computed square root. + """ + return sqrt(x=self) + + @openeo_process + def subtract(self, y) -> ProcessBuilder: + """ + Subtraction of two numbers + + :param self: The minuend. + :param y: The subtrahend. + + :return: The computed result. + """ + return subtract(x=self, y=y) + + @openeo_process + def sum(self, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Compute the sum by adding up numbers + + :param self: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. + Setting this flag to `false` considers no-data values so that `null` is returned if any value is such a + value. + + :return: The computed sum of the sequence of numbers. + """ + return sum(data=self, ignore_nodata=ignore_nodata) + + @openeo_process + def tan(self) -> ProcessBuilder: + """ + Tangent + + :param self: An angle in radians. + + :return: The computed tangent of `x`. + """ + return tan(x=self) + + @openeo_process + def tanh(self) -> ProcessBuilder: + """ + Hyperbolic tangent + + :param self: An angle in radians. + + :return: The computed hyperbolic tangent of `x`. + """ + return tanh(x=self) + + @openeo_process + def text_begins(self, pattern, case_sensitive=UNSET) -> ProcessBuilder: + """ + Text begins with another text + + :param self: Text in which to find something at the beginning. + :param pattern: Text to find at the beginning of `data`. Regular expressions are not supported. + :param case_sensitive: Case sensitive comparison can be disabled by setting this parameter to `false`. + + :return: `true` if `data` begins with `pattern`, false` otherwise. + """ + return text_begins(data=self, pattern=pattern, case_sensitive=case_sensitive) + + @openeo_process + def text_concat(self, separator=UNSET) -> ProcessBuilder: + """ + Concatenate elements to a single text + + :param self: A set of elements. Numbers, boolean values and null values get converted to their (lower + case) string representation. For example: `1` (integer), `-1.5` (number), `true` / `false` (boolean + values) + :param separator: A separator to put between each of the individual texts. Defaults to an empty string. + + :return: A string containing a string representation of all the array elements in the same order, with + the separator between each element. + """ + return text_concat(data=self, separator=separator) + + @openeo_process + def text_contains(self, pattern, case_sensitive=UNSET) -> ProcessBuilder: + """ + Text contains another text + + :param self: Text in which to find something in. + :param pattern: Text to find in `data`. Regular expressions are not supported. + :param case_sensitive: Case sensitive comparison can be disabled by setting this parameter to `false`. + + :return: `true` if `data` contains the `pattern`, false` otherwise. + """ + return text_contains(data=self, pattern=pattern, case_sensitive=case_sensitive) + + @openeo_process + def text_ends(self, pattern, case_sensitive=UNSET) -> ProcessBuilder: + """ + Text ends with another text + + :param self: Text in which to find something at the end. + :param pattern: Text to find at the end of `data`. Regular expressions are not supported. + :param case_sensitive: Case sensitive comparison can be disabled by setting this parameter to `false`. + + :return: `true` if `data` ends with `pattern`, false` otherwise. + """ + return text_ends(data=self, pattern=pattern, case_sensitive=case_sensitive) + + @openeo_process + def trim_cube(self) -> ProcessBuilder: + """ + Remove dimension labels with no-data values + + :param self: A data cube to trim. + + :return: A trimmed data cube with the same dimensions. The dimension properties name, type, reference + system and resolution remain unchanged. The number of dimension labels may decrease. + """ + return trim_cube(data=self) + + @openeo_process + def unflatten_dimension(self, dimension, target_dimensions, label_separator=UNSET) -> ProcessBuilder: + """ + Split a single dimensions into multiple dimensions + + :param self: A data cube that is consistently structured so that operation can execute flawlessly (e.g. + the dimension labels need to contain the `label_separator` exactly 1 time for two target dimensions, 2 + times for three target dimensions etc.). + :param dimension: The name of the dimension to split. + :param target_dimensions: The names of the new target dimensions. New dimensions will be created with + the given names and type `other` (see ``add_dimension()``). Fails with a `TargetDimensionExists` + exception if any of the dimensions exists. The order of the array defines the order in which the + dimensions and dimension labels are added to the data cube (see the example in the process + description). + :param label_separator: The string that will be used as a separator to split the dimension labels. + + :return: A data cube with the new shape. The dimension properties (name, type, labels, reference system + and resolution) for all other dimensions remain unchanged. + """ + return unflatten_dimension(data=self, dimension=dimension, target_dimensions=target_dimensions, label_separator=label_separator) + + @openeo_process + def variance(self, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Variance + + :param self: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. + Setting this flag to `false` considers no-data values so that `null` is returned if any value is such a + value. + + :return: The computed sample variance. + """ + return variance(data=self, ignore_nodata=ignore_nodata) + + @openeo_process + def vector_buffer(self, distance) -> ProcessBuilder: + """ + Buffer geometries by distance + + :param self: Geometries to apply the buffer on. Feature properties are preserved. + :param distance: The distance of the buffer in meters. A positive distance expands the geometries, + resulting in outward buffering (dilation), while a negative distance shrinks the geometries, resulting + in inward buffering (erosion). If the unit of the spatial reference system is not meters, a + `UnitMismatch` error is thrown. Use ``vector_reproject()`` to convert the geometries to a suitable + spatial reference system. + + :return: Returns a vector data cube with the computed new geometries of which some may be empty. + """ + return vector_buffer(geometries=self, distance=distance) + + @openeo_process + def vector_reproject(self, projection, dimension=UNSET) -> ProcessBuilder: + """ + Reprojects the geometry dimension + + :param self: A vector data cube. + :param projection: Coordinate reference system to reproject to. Specified as an [EPSG + code](http://www.epsg-registry.org/) or [WKT2 CRS + string](http://docs.opengeospatial.org/is/18-010r7/18-010r7.html). + :param dimension: The name of the geometry dimension to reproject. If no specific dimension is + specified, the filter applies to all geometry dimensions. Fails with a `DimensionNotAvailable` + exception if the specified dimension does not exist. + + :return: A vector data cube with geometries projected to the new coordinate reference system. The + reference system of the geometry dimension changes, all other dimensions and properties remain + unchanged. + """ + return vector_reproject(data=self, projection=projection, dimension=dimension) + + @openeo_process + def vector_to_random_points(self, geometry_count=UNSET, total_count=UNSET, group=UNSET, seed=UNSET) -> ProcessBuilder: + """ + Sample random points from geometries + + :param self: Input geometries for sample extraction. + :param geometry_count: The maximum number of points to compute per geometry. Points in the input + geometries can be selected only once by the sampling. + :param total_count: The maximum number of points to compute overall. Throws a `CountMismatch` + exception if the specified value is less than the provided number of geometries. + :param group: Specifies whether the sampled points should be grouped by input geometry (default) or be + generated as independent points. * If the sampled points are grouped, the process generates a + `MultiPoint` per geometry given which keeps the original identifier if present. * Otherwise, each + sampled point is generated as a distinct `Point` geometry without identifier. + :param seed: A randomization seed to use for random sampling. If not given or `null`, no seed is used + and results may differ on subsequent use. + + :return: Returns a vector data cube with the sampled points. + """ + return vector_to_random_points(data=self, geometry_count=geometry_count, total_count=total_count, group=group, seed=seed) + + @openeo_process + def vector_to_regular_points(self, distance, group=UNSET) -> ProcessBuilder: + """ + Sample regular points from geometries + + :param self: Input geometries for sample extraction. + :param distance: Defines the minimum distance in meters that is required between two samples generated + *inside* a single geometry. If the unit of the spatial reference system is not meters, a `UnitMismatch` + error is thrown. Use ``vector_reproject()`` to convert the geometries to a suitable spatial reference + system. - For **polygons**, the distance defines the cell sizes of a regular grid that starts at the + upper-left bound of each polygon. The centroid of each cell is then a sample point. If the centroid is + not enclosed in the polygon, no point is sampled. If no point can be sampled for the geometry at all, + the first coordinate of the geometry is returned as point. - For **lines** (line strings), the sampling + starts with a point at the first coordinate of the line and then walks along the line and samples a new + point each time the distance to the previous point has been reached again. - For **points**, the point + is returned as given. + :param group: Specifies whether the sampled points should be grouped by input geometry (default) or be + generated as independent points. * If the sampled points are grouped, the process generates a + `MultiPoint` per geometry given which keeps the original identifier if present. * Otherwise, each + sampled point is generated as a distinct `Point` geometry without identifier. + + :return: Returns a vector data cube with the sampled points. + """ + return vector_to_regular_points(data=self, distance=distance, group=group) + + @openeo_process + def xor(self, y) -> ProcessBuilder: + """ + Logical XOR (exclusive or) + + :param self: A boolean value. + :param y: A boolean value. + + :return: Boolean result of the logical XOR. + """ + return xor(x=self, y=y)
+ + + +# Public shortcut +process = ProcessBuilder.process +# Private shortcut that has lower chance to collide with a process argument named `process` +_process = ProcessBuilder.process + + +
+[docs] +@openeo_process +def absolute(x) -> ProcessBuilder: + """ + Absolute value + + :param x: A number. + + :return: The computed absolute value. + """ + return _process('absolute', x=x)
+ + + +
+[docs] +@openeo_process +def add(x, y) -> ProcessBuilder: + """ + Addition of two numbers + + :param x: The first summand. + :param y: The second summand. + + :return: The computed sum of the two numbers. + """ + return _process('add', x=x, y=y)
+ + + +
+[docs] +@openeo_process +def add_dimension(data, name, label, type=UNSET) -> ProcessBuilder: + """ + Add a new dimension + + :param data: A data cube to add the dimension to. + :param name: Name for the dimension. + :param label: A dimension label. + :param type: The type of dimension, defaults to `other`. + + :return: The data cube with a newly added dimension. The new dimension has exactly one dimension label. All + other dimensions remain unchanged. + """ + return _process('add_dimension', data=data, name=name, label=label, type=type)
+ + + +
+[docs] +@openeo_process +def aggregate_spatial(data, geometries, reducer, target_dimension=UNSET, context=UNSET) -> ProcessBuilder: + """ + Zonal statistics for geometries + + :param data: A raster data cube with at least two spatial dimensions. The data cube implicitly gets + restricted to the bounds of the geometries as if ``filter_spatial()`` would have been used with the same + values for the corresponding parameters immediately before this process. + :param geometries: Geometries for which the aggregation will be computed. Feature properties are preserved + for vector data cubes and all GeoJSON Features. One value will be computed per label in the dimension of + type `geometries`, GeoJSON `Feature` or `Geometry`. For a `FeatureCollection` multiple values will be + computed, one value per contained `Feature`. No values will be computed for empty geometries. For example, + a single value will be computed for a `MultiPolygon`, but two values will be computed for a + `FeatureCollection` containing two polygons. - For **polygons**, the process considers all pixels for + which the point at the pixel center intersects with the corresponding polygon (as defined in the Simple + Features standard by the OGC). - For **points**, the process considers the closest pixel center. - For + **lines** (line strings), the process considers all the pixels whose centers are closest to at least one + point on the line. Thus, pixels may be part of multiple geometries and be part of multiple aggregations. + No operation is applied to geometries that are outside of the bounds of the data. + :param reducer: A reducer to be applied on all values of each geometry. A reducer is a single process such + as ``mean()`` or a set of processes, which computes a single value for a list of values, see the category + 'reducer' for such processes. + :param target_dimension: By default (which is `null`), the process only computes the results and doesn't + add a new dimension. If this parameter contains a new dimension name, the computation also stores + information about the total count of pixels (valid + invalid pixels) and the number of valid pixels (see + ``is_valid()``) for each computed value. These values are added as a new dimension. The new dimension of + type `other` has the dimension labels `value`, `total_count` and `valid_count`. Fails with a + `TargetDimensionExists` exception if a dimension with the specified name exists. + :param context: Additional data to be passed to the reducer. + + :return: A vector data cube with the computed results. Empty geometries still exist but without any + aggregated values (i.e. no-data). The spatial dimensions are replaced by a dimension of type 'geometries' + and if `target_dimension` is not `null`, a new dimension is added. + """ + return _process('aggregate_spatial', + data=data, + geometries=geometries, + reducer=build_child_callback(reducer, parent_parameters=['data', 'context']), + target_dimension=target_dimension, + context=context + )
+ + + +
+[docs] +@openeo_process +def aggregate_spatial_window(data, reducer, size, boundary=UNSET, align=UNSET, context=UNSET) -> ProcessBuilder: + """ + Zonal statistics for rectangular windows + + :param data: A raster data cube with exactly two horizontal spatial dimensions and an arbitrary number of + additional dimensions. The process is applied to all additional dimensions individually. + :param reducer: A reducer to be applied on the list of values, which contain all pixels covered by the + window. A reducer is a single process such as ``mean()`` or a set of processes, which computes a single + value for a list of values, see the category 'reducer' for such processes. + :param size: Window size in pixels along the horizontal spatial dimensions. The first value corresponds to + the `x` axis, the second value corresponds to the `y` axis. + :param boundary: Behavior to apply if the number of values for the axes `x` and `y` is not a multiple of + the corresponding value in the `size` parameter. Options are: - `pad` (default): pad the data cube with + the no-data value `null` to fit the required window size. - `trim`: trim the data cube to fit the required + window size. Set the parameter `align` to specifies to which corner the data is aligned to. + :param align: If the data requires padding or trimming (see parameter `boundary`), specifies to which + corner of the spatial extent the data is aligned to. For example, if the data is aligned to the upper left, + the process pads/trims at the lower-right. + :param context: Additional data to be passed to the reducer. + + :return: A raster data cube with the newly computed values and the same dimensions. The resolution will + change depending on the chosen values for the `size` and `boundary` parameter. It usually decreases for the + dimensions which have the corresponding parameter `size` set to values greater than 1. The dimension + labels will be set to the coordinate at the center of the window. The other dimension properties (name, + type and reference system) remain unchanged. + """ + return _process('aggregate_spatial_window', + data=data, + reducer=build_child_callback(reducer, parent_parameters=['data', 'context']), + size=size, + boundary=boundary, + align=align, + context=context + )
+ + + +
+[docs] +@openeo_process +def aggregate_temporal(data, intervals, reducer, labels=UNSET, dimension=UNSET, context=UNSET) -> ProcessBuilder: + """ + Temporal aggregations + + :param data: A data cube. + :param intervals: Left-closed temporal intervals, which are allowed to overlap. Each temporal interval in + the array has exactly two elements: 1. The first element is the start of the temporal interval. The + specified time instant is **included** in the interval. 2. The second element is the end of the temporal + interval. The specified time instant is **excluded** from the interval. The second element must always be + greater/later than the first element, except when using time without date. Otherwise, a + `TemporalExtentEmpty` exception is thrown. + :param reducer: A reducer to be applied for the values contained in each interval. A reducer is a single + process such as ``mean()`` or a set of processes, which computes a single value for a list of values, see + the category 'reducer' for such processes. Intervals may not contain any values, which for most reducers + leads to no-data (`null`) values by default. + :param labels: Distinct labels for the intervals, which can contain dates and/or times. Is only required to + be specified if the values for the start of the temporal intervals are not distinct and thus the default + labels would not be unique. The number of labels and the number of groups need to be equal. + :param dimension: The name of the temporal dimension for aggregation. All data along the dimension is + passed through the specified reducer. If the dimension is not set or set to `null`, the data cube is + expected to only have one temporal dimension. Fails with a `TooManyDimensions` exception if it has more + dimensions. Fails with a `DimensionNotAvailable` exception if the specified dimension does not exist. + :param context: Additional data to be passed to the reducer. + + :return: A new data cube with the same dimensions. The dimension properties (name, type, labels, reference + system and resolution) remain unchanged, except for the resolution and dimension labels of the given + temporal dimension. + """ + return _process('aggregate_temporal', + data=data, + intervals=intervals, + reducer=build_child_callback(reducer, parent_parameters=['data', 'context']), + labels=labels, + dimension=dimension, + context=context + )
+ + + +
+[docs] +@openeo_process +def aggregate_temporal_period(data, period, reducer, dimension=UNSET, context=UNSET) -> ProcessBuilder: + """ + Temporal aggregations based on calendar hierarchies + + :param data: The source data cube. + :param period: The time intervals to aggregate. The following pre-defined values are available: * `hour`: + Hour of the day * `day`: Day of the year * `week`: Week of the year * `dekad`: Ten day periods, counted per + year with three periods per month (day 1 - 10, 11 - 20 and 21 - end of month). The third dekad of the month + can range from 8 to 11 days. For example, the third dekad of a year spans from January 21 till January 31 + (11 days), the fourth dekad spans from February 1 till February 10 (10 days) and the sixth dekad spans from + February 21 till February 28 or February 29 in a leap year (8 or 9 days respectively). * `month`: Month of + the year * `season`: Three month periods of the calendar seasons (December - February, March - May, June - + August, September - November). * `tropical-season`: Six month periods of the tropical seasons (November - + April, May - October). * `year`: Proleptic years * `decade`: Ten year periods ([0-to-9 + decade](https://en.wikipedia.org/wiki/Decade#0-to-9_decade)), from a year ending in a 0 to the next year + ending in a 9. * `decade-ad`: Ten year periods ([1-to-0 + decade](https://en.wikipedia.org/wiki/Decade#1-to-0_decade)) better aligned with the anno Domini (AD) + calendar era, from a year ending in a 1 to the next year ending in a 0. + :param reducer: A reducer to be applied for the values contained in each period. A reducer is a single + process such as ``mean()`` or a set of processes, which computes a single value for a list of values, see + the category 'reducer' for such processes. Periods may not contain any values, which for most reducers + leads to no-data (`null`) values by default. + :param dimension: The name of the temporal dimension for aggregation. All data along the dimension is + passed through the specified reducer. If the dimension is not set or set to `null`, the source data cube is + expected to only have one temporal dimension. Fails with a `TooManyDimensions` exception if it has more + dimensions. Fails with a `DimensionNotAvailable` exception if the specified dimension does not exist. + :param context: Additional data to be passed to the reducer. + + :return: A new data cube with the same dimensions. The dimension properties (name, type, labels, reference + system and resolution) remain unchanged, except for the resolution and dimension labels of the given + temporal dimension. The specified temporal dimension has the following dimension labels (`YYYY` = four- + digit year, `MM` = two-digit month, `DD` two-digit day of month): * `hour`: `YYYY-MM-DD-00` - `YYYY-MM- + DD-23` * `day`: `YYYY-001` - `YYYY-365` * `week`: `YYYY-01` - `YYYY-52` * `dekad`: `YYYY-00` - `YYYY-36` * + `month`: `YYYY-01` - `YYYY-12` * `season`: `YYYY-djf` (December - February), `YYYY-mam` (March - May), + `YYYY-jja` (June - August), `YYYY-son` (September - November). * `tropical-season`: `YYYY-ndjfma` (November + - April), `YYYY-mjjaso` (May - October). * `year`: `YYYY` * `decade`: `YYY0` * `decade-ad`: `YYY1` The + dimension labels in the new data cube are complete for the whole extent of the source data cube. For + example, if `period` is set to `day` and the source data cube has two dimension labels at the beginning of + the year (`2020-01-01`) and the end of a year (`2020-12-31`), the process returns a data cube with 365 + dimension labels (`2020-001`, `2020-002`, ..., `2020-365`). In contrast, if `period` is set to `day` and + the source data cube has just one dimension label `2020-01-05`, the process returns a data cube with just a + single dimension label (`2020-005`). + """ + return _process('aggregate_temporal_period', + data=data, + period=period, + reducer=build_child_callback(reducer, parent_parameters=['data', 'context']), + dimension=dimension, + context=context + )
+ + + +
+[docs] +@openeo_process +def all(data, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Are all of the values true? + + :param data: A set of boolean values. + :param ignore_nodata: Indicates whether no-data values are ignored or not and ignores them by default. + + :return: Boolean result of the logical operation. + """ + return _process('all', data=data, ignore_nodata=ignore_nodata)
+ + + +
+[docs] +@openeo_process +def and_(x, y) -> ProcessBuilder: + """ + Logical AND + + :param x: A boolean value. + :param y: A boolean value. + + :return: Boolean result of the logical AND. + """ + return _process('and', x=x, y=y)
+ + + +
+[docs] +@openeo_process +def anomaly(data, normals, period) -> ProcessBuilder: + """ + Compute anomalies + + :param data: A data cube with exactly one temporal dimension and the following dimension labels for the + given period (`YYYY` = four-digit year, `MM` = two-digit month, `DD` two-digit day of month): * `hour`: + `YYYY-MM-DD-00` - `YYYY-MM-DD-23` * `day`: `YYYY-001` - `YYYY-365` * `week`: `YYYY-01` - `YYYY-52` * + `dekad`: `YYYY-00` - `YYYY-36` * `month`: `YYYY-01` - `YYYY-12` * `season`: `YYYY-djf` (December - + February), `YYYY-mam` (March - May), `YYYY-jja` (June - August), `YYYY-son` (September - November). * + `tropical-season`: `YYYY-ndjfma` (November - April), `YYYY-mjjaso` (May - October). * `year`: `YYYY` * + `decade`: `YYY0` * `decade-ad`: `YYY1` * `single-period` / `climatology-period`: Any + ``aggregate_temporal_period()`` can compute such a data cube. + :param normals: A data cube with normals, e.g. daily, monthly or yearly values computed from a process such + as ``climatological_normal()``. Must contain exactly one temporal dimension with the following dimension + labels for the given period: * `hour`: `00` - `23` * `day`: `001` - `365` * `week`: `01` - `52` * `dekad`: + `00` - `36` * `month`: `01` - `12` * `season`: `djf` (December - February), `mam` (March - May), `jja` + (June - August), `son` (September - November) * `tropical-season`: `ndjfma` (November - April), `mjjaso` + (May - October) * `year`: Four-digit year numbers * `decade`: Four-digit year numbers, the last digit being + a `0` * `decade-ad`: Four-digit year numbers, the last digit being a `1` * `single-period` / `climatology- + period`: A single dimension label with any name is expected. + :param period: Specifies the time intervals available in the normals data cube. The following options are + available: * `hour`: Hour of the day * `day`: Day of the year * `week`: Week of the year * `dekad`: Ten + day periods, counted per year with three periods per month (day 1 - 10, 11 - 20 and 21 - end of month). The + third dekad of the month can range from 8 to 11 days. For example, the fourth dekad is Feb, 1 - Feb, 10 + each year. * `month`: Month of the year * `season`: Three month periods of the calendar seasons (December - + February, March - May, June - August, September - November). * `tropical-season`: Six month periods of the + tropical seasons (November - April, May - October). * `year`: Proleptic years * `decade`: Ten year periods + ([0-to-9 decade](https://en.wikipedia.org/wiki/Decade#0-to-9_decade)), from a year ending in a 0 to the + next year ending in a 9. * `decade-ad`: Ten year periods ([1-to-0 + decade](https://en.wikipedia.org/wiki/Decade#1-to-0_decade)) better aligned with the anno Domini (AD) + calendar era, from a year ending in a 1 to the next year ending in a 0. * `single-period` / `climatology- + period`: A single period of arbitrary length + + :return: A data cube with the same dimensions. The dimension properties (name, type, labels, reference + system and resolution) remain unchanged. + """ + return _process('anomaly', data=data, normals=normals, period=period)
+ + + +
+[docs] +@openeo_process +def any(data, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Is at least one value true? + + :param data: A set of boolean values. + :param ignore_nodata: Indicates whether no-data values are ignored or not and ignores them by default. + + :return: Boolean result of the logical operation. + """ + return _process('any', data=data, ignore_nodata=ignore_nodata)
+ + + +
+[docs] +@openeo_process +def apply(data, process, context=UNSET) -> ProcessBuilder: + """ + Apply a process to each value + + :param data: A data cube. + :param process: A process that accepts and returns a single value and is applied on each individual value + in the data cube. The process may consist of multiple sub-processes and could, for example, consist of + processes such as ``absolute()`` or ``linear_scale_range()``. + :param context: Additional data to be passed to the process. + + :return: A data cube with the newly computed values and the same dimensions. The dimension properties + (name, type, labels, reference system and resolution) remain unchanged. + """ + return _process('apply', data=data, process=build_child_callback(process, parent_parameters=['x', 'context']), context=context)
+ + + +
+[docs] +@openeo_process +def apply_dimension(data, process, dimension, target_dimension=UNSET, context=UNSET) -> ProcessBuilder: + """ + Apply a process to all values along a dimension + + :param data: A data cube. + :param process: Process to be applied on all values along the given dimension. The specified process needs + to accept an array and must return an array with at least one element. A process may consist of multiple + sub-processes. + :param dimension: The name of the source dimension to apply the process on. Fails with a + `DimensionNotAvailable` exception if the specified dimension does not exist. + :param target_dimension: The name of the target dimension or `null` (the default) to use the source + dimension specified in the parameter `dimension`. By specifying a target dimension, the source dimension + is removed. The target dimension with the specified name and the type `other` (see ``add_dimension()``) is + created, if it doesn't exist yet. + :param context: Additional data to be passed to the process. + + :return: A data cube with the newly computed values. All dimensions stay the same, except for the + dimensions specified in corresponding parameters. There are three cases how the dimensions can change: 1. + The source dimension is the target dimension: - The (number of) dimensions remain unchanged as the + source dimension is the target dimension. - The source dimension properties name and type remain + unchanged. - The dimension labels, the reference system and the resolution are preserved only if the + number of values in the source dimension is equal to the number of values computed by the process. + Otherwise, all other dimension properties change as defined in the list below. 2. The source dimension is + not the target dimension. The target dimension exists with a single label only: - The number of + dimensions decreases by one as the source dimension is 'dropped' and the target dimension is filled with + the processed data that originates from the source dimension. - The target dimension properties name and + type remain unchanged. All other dimension properties change as defined in the list below. 3. The source + dimension is not the target dimension and the latter does not exist: - The number of dimensions remain + unchanged, but the source dimension is replaced with the target dimension. - The target dimension has + the specified name and the type other. All other dimension properties are set as defined in the list below. + Unless otherwise stated above, for the given (target) dimension the following applies: - the number of + dimension labels is equal to the number of values computed by the process, - the dimension labels are + incrementing integers starting from zero, - the resolution changes, and - the reference system is + undefined. + """ + return _process('apply_dimension', + data=data, + process=build_child_callback(process, parent_parameters=['data', 'context']), + dimension=dimension, + target_dimension=target_dimension, + context=context + )
+ + + +
+[docs] +@openeo_process +def apply_kernel(data, kernel, factor=UNSET, border=UNSET, replace_invalid=UNSET) -> ProcessBuilder: + """ + Apply a spatial convolution with a kernel + + :param data: A raster data cube. + :param kernel: Kernel as a two-dimensional array of weights. The inner level of the nested array aligns + with the `x` axis and the outer level aligns with the `y` axis. Each level of the kernel must have an + uneven number of elements, otherwise the process throws a `KernelDimensionsUneven` exception. + :param factor: A factor that is multiplied to each value after the kernel has been applied. This is + basically a shortcut for explicitly multiplying each value by a factor afterwards, which is often required + for some kernel-based algorithms such as the Gaussian blur. + :param border: Determines how the data is extended when the kernel overlaps with the borders. Defaults to + fill the border with zeroes. The following options are available: * *numeric value* - fill with a user- + defined constant number `n`: `nnnnnn|abcdefgh|nnnnnn` (default, with `n` = 0) * `replicate` - repeat the + value from the pixel at the border: `aaaaaa|abcdefgh|hhhhhh` * `reflect` - mirror/reflect from the border: + `fedcba|abcdefgh|hgfedc` * `reflect_pixel` - mirror/reflect from the center of the pixel at the border: + `gfedcb|abcdefgh|gfedcb` * `wrap` - repeat/wrap the image: `cdefgh|abcdefgh|abcdef` + :param replace_invalid: This parameter specifies the value to replace non-numerical or infinite numerical + values with. By default, those values are replaced with zeroes. + + :return: A data cube with the newly computed values and the same dimensions. The dimension properties + (name, type, labels, reference system and resolution) remain unchanged. + """ + return _process('apply_kernel', data=data, kernel=kernel, factor=factor, border=border, replace_invalid=replace_invalid)
+ + + +
+[docs] +@openeo_process +def apply_neighborhood(data, process, size, overlap=UNSET, context=UNSET) -> ProcessBuilder: + """ + Apply a process to pixels in a n-dimensional neighborhood + + :param data: A raster data cube. + :param process: Process to be applied on all neighborhoods. + :param size: Neighborhood sizes along each dimension. This object maps dimension names to either a + physical measure (e.g. 100 m, 10 days) or pixels (e.g. 32 pixels). For dimensions not specified, the + default is to provide all values. Be aware that including all values from overly large dimensions may not + be processed at once. + :param overlap: Overlap of neighborhoods along each dimension to avoid border effects. By default no + overlap is provided. For instance a temporal dimension can add 1 month before and after a neighborhood. In + the spatial dimensions, this is often a number of pixels. The overlap specified is added before and after, + so an overlap of 8 pixels will add 8 pixels on both sides of the window, so 16 in total. Be aware that + large overlaps increase the need for computational resources and modifying overlapping data in subsequent + operations have no effect. + :param context: Additional data to be passed to the process. + + :return: A raster data cube with the newly computed values and the same dimensions. The dimension + properties (name, type, labels, reference system and resolution) remain unchanged. + """ + return _process('apply_neighborhood', + data=data, + process=build_child_callback(process, parent_parameters=['data', 'context']), + size=size, + overlap=overlap, + context=context + )
+ + + +
+[docs] +@openeo_process +def apply_polygon(data, polygons, process, mask_value=UNSET, context=UNSET) -> ProcessBuilder: + """ + Apply a process to segments of the data cube + + :param data: A data cube. + :param polygons: A vector data cube containing at least one polygon. The provided vector data can be one of + the following: * A `Polygon` or `MultiPolygon` geometry, * a `Feature` with a `Polygon` or `MultiPolygon` + geometry, or * a `FeatureCollection` containing at least one `Feature` with `Polygon` or `MultiPolygon` + geometries. * Empty geometries are ignored. + :param process: A process that accepts and returns a single data cube and is applied on each individual sub + data cube. The process may consist of multiple sub-processes. + :param mask_value: All pixels for which the point at the pixel center **does not** intersect with the + polygon are replaced with the given value, which defaults to `null` (no data). It can provide a + distinction between no data values within the polygon and masked pixels outside of it. + :param context: Additional data to be passed to the process. + + :return: A data cube with the newly computed values and the same dimensions. The dimension properties + (name, type, labels, reference system and resolution) remain unchanged. + """ + return _process('apply_polygon', + data=data, + polygons=polygons, + process=build_child_callback(process, parent_parameters=['data', 'context']), + mask_value=mask_value, + context=context + )
+ + + +
+[docs] +@openeo_process +def arccos(x) -> ProcessBuilder: + """ + Inverse cosine + + :param x: A number. + + :return: The computed angle in radians. + """ + return _process('arccos', x=x)
+ + + +
+[docs] +@openeo_process +def arcosh(x) -> ProcessBuilder: + """ + Inverse hyperbolic cosine + + :param x: A number. + + :return: The computed angle in radians. + """ + return _process('arcosh', x=x)
+ + + +
+[docs] +@openeo_process +def arcsin(x) -> ProcessBuilder: + """ + Inverse sine + + :param x: A number. + + :return: The computed angle in radians. + """ + return _process('arcsin', x=x)
+ + + +
+[docs] +@openeo_process +def arctan(x) -> ProcessBuilder: + """ + Inverse tangent + + :param x: A number. + + :return: The computed angle in radians. + """ + return _process('arctan', x=x)
+ + + +
+[docs] +@openeo_process +def arctan2(y, x) -> ProcessBuilder: + """ + Inverse tangent of two numbers + + :param y: A number to be used as the dividend. + :param x: A number to be used as the divisor. + + :return: The computed angle in radians. + """ + return _process('arctan2', y=y, x=x)
+ + + +
+[docs] +@openeo_process +def ard_normalized_radar_backscatter(data, elevation_model=UNSET, contributing_area=UNSET, ellipsoid_incidence_angle=UNSET, noise_removal=UNSET, options=UNSET) -> ProcessBuilder: + """ + CARD4L compliant SAR NRB generation + + :param data: The source data cube containing SAR input. + :param elevation_model: The digital elevation model to use. Set to `null` (the default) to allow the back- + end to choose, which will improve portability, but reduce reproducibility. + :param contributing_area: If set to `true`, a DEM-based local contributing area band named + `contributing_area` is added. The values are given in square meters. + :param ellipsoid_incidence_angle: If set to `true`, an ellipsoidal incidence angle band named + `ellipsoid_incidence_angle` is added. The values are given in degrees. + :param noise_removal: If set to `false`, no noise removal is applied. Defaults to `true`, which removes + noise. + :param options: Proprietary options for the backscatter computations. Specifying proprietary options will + reduce portability. + + :return: Backscatter values expressed as gamma0 in linear scale. In addition to the bands + `contributing_area` and `ellipsoid_incidence_angle` that can optionally be added with corresponding + parameters, the following bands are always added to the data cube: - `mask`: A data mask that indicates + which values are valid (1), invalid (0) or contain no-data (null). - `local_incidence_angle`: A band with + DEM-based local incidence angles in degrees. The data returned is CARD4L compliant with corresponding + metadata. + """ + return _process('ard_normalized_radar_backscatter', + data=data, + elevation_model=elevation_model, + contributing_area=contributing_area, + ellipsoid_incidence_angle=ellipsoid_incidence_angle, + noise_removal=noise_removal, + options=options + )
+ + + +
+[docs] +@openeo_process +def ard_surface_reflectance(data, atmospheric_correction_method, cloud_detection_method, elevation_model=UNSET, atmospheric_correction_options=UNSET, cloud_detection_options=UNSET) -> ProcessBuilder: + """ + CARD4L compliant Surface Reflectance generation + + :param data: The source data cube containing multi-spectral optical top of the atmosphere (TOA) + reflectances. There must be a single dimension of type `bands` available. + :param atmospheric_correction_method: The atmospheric correction method to use. + :param cloud_detection_method: The cloud detection method to use. Each method supports detecting different + atmospheric disturbances such as clouds, cloud shadows, aerosols, haze, ozone and/or water vapour in + optical imagery. + :param elevation_model: The digital elevation model to use. Set to `null` (the default) to allow the back- + end to choose, which will improve portability, but reduce reproducibility. + :param atmospheric_correction_options: Proprietary options for the atmospheric correction method. + Specifying proprietary options will reduce portability. + :param cloud_detection_options: Proprietary options for the cloud detection method. Specifying proprietary + options will reduce portability. + + :return: Data cube containing bottom of atmosphere reflectances for each spectral band in the source data + cube, with atmospheric disturbances like clouds and cloud shadows removed. No-data values (null) are + directly set in the bands. Depending on the methods used, several additional bands will be added to the + data cube: Data cube containing bottom of atmosphere reflectances for each spectral band in the source + data cube, with atmospheric disturbances like clouds and cloud shadows removed. Depending on the methods + used, several additional bands will be added to the data cube: - `date` (optional): Specifies per-pixel + acquisition timestamps. - `incomplete-testing` (required): Identifies pixels with a value of 1 for which + the per-pixel tests (at least saturation, cloud and cloud shadows, see CARD4L specification for details) + have not all been successfully completed. Otherwise, the value is 0. - `saturation` (required) / + `saturation_{band}` (optional): Indicates where pixels in the input spectral bands are saturated (1) or not + (0). If the saturation is given per band, the band names are `saturation_{band}` with `{band}` being the + band name from the source data cube. - `cloud`, `shadow` (both required),`aerosol`, `haze`, `ozone`, + `water_vapor` (all optional): Indicates the probability of pixels being an atmospheric disturbance such as + clouds. All bands have values between 0 (clear) and 1, which describes the probability that it is an + atmospheric disturbance. - `snow-ice` (optional): Points to a file that indicates whether a pixel is + assessed as being snow/ice (1) or not (0). All values describe the probability and must be between 0 and 1. + - `land-water` (optional): Indicates whether a pixel is assessed as being land (1) or water (0). All values + describe the probability and must be between 0 and 1. - `incidence-angle` (optional): Specifies per-pixel + incidence angles in degrees. - `azimuth` (optional): Specifies per-pixel azimuth angles in degrees. - `sun- + azimuth:` (optional): Specifies per-pixel sun azimuth angles in degrees. - `sun-elevation` (optional): + Specifies per-pixel sun elevation angles in degrees. - `terrain-shadow` (optional): Indicates with a value + of 1 whether a pixel is not directly illuminated due to terrain shadowing. Otherwise, the value is 0. - + `terrain-occlusion` (optional): Indicates with a value of 1 whether a pixel is not visible to the sensor + due to terrain occlusion during off-nadir viewing. Otherwise, the value is 0. - `terrain-illumination` + (optional): Contains coefficients used for terrain illumination correction are provided for each pixel. + The data returned is CARD4L compliant with corresponding metadata. + """ + return _process('ard_surface_reflectance', + data=data, + atmospheric_correction_method=atmospheric_correction_method, + cloud_detection_method=cloud_detection_method, + elevation_model=elevation_model, + atmospheric_correction_options=atmospheric_correction_options, + cloud_detection_options=cloud_detection_options + )
+ + + +
+[docs] +@openeo_process +def array_append(data, value, label=UNSET) -> ProcessBuilder: + """ + Append a value to an array + + :param data: An array. + :param value: Value to append to the array. + :param label: If the given array is a labeled array, a new label for the new value should be given. If not + given or `null`, the array index as string is used as the label. If in any case the label exists, a + `LabelExists` exception is thrown. + + :return: The new array with the value being appended. + """ + return _process('array_append', data=data, value=value, label=label)
+ + + +
+[docs] +@openeo_process +def array_apply(data, process, context=UNSET) -> ProcessBuilder: + """ + Apply a process to each array element + + :param data: An array. + :param process: A process that accepts and returns a single value and is applied on each individual value + in the array. The process may consist of multiple sub-processes and could, for example, consist of + processes such as ``absolute()`` or ``linear_scale_range()``. + :param context: Additional data to be passed to the process. + + :return: An array with the newly computed values. The number of elements are the same as for the original + array. + """ + return _process('array_apply', + data=data, + process=build_child_callback(process, parent_parameters=['x', 'index', 'label', 'context']), + context=context + )
+ + + +
+[docs] +@openeo_process +def array_concat(array1, array2) -> ProcessBuilder: + """ + Merge two arrays + + :param array1: The first array. + :param array2: The second array. + + :return: The merged array. + """ + return _process('array_concat', array1=array1, array2=array2)
+ + + +
+[docs] +@openeo_process +def array_contains(data, value) -> ProcessBuilder: + """ + Check whether the array contains a given value + + :param data: List to find the value in. + :param value: Value to find in `data`. If the value is `null`, this process returns always `false`. + + :return: `true` if the list contains the value, false` otherwise. + """ + return _process('array_contains', data=data, value=value)
+ + + +
+[docs] +@openeo_process +def array_create(data=UNSET, repeat=UNSET) -> ProcessBuilder: + """ + Create an array + + :param data: A (native) array to fill the newly created array with. Defaults to an empty array. + :param repeat: The number of times the (native) array specified in `data` is repeatedly added after each + other to the new array being created. Defaults to `1`. + + :return: The newly created array. + """ + return _process('array_create', data=data, repeat=repeat)
+ + + +
+[docs] +@openeo_process +def array_create_labeled(data, labels) -> ProcessBuilder: + """ + Create a labeled array + + :param data: An array of values to be used. + :param labels: An array of labels to be used. + + :return: The newly created labeled array. + """ + return _process('array_create_labeled', data=data, labels=labels)
+ + + +
+[docs] +@openeo_process +def array_element(data, index=UNSET, label=UNSET, return_nodata=UNSET) -> ProcessBuilder: + """ + Get an element from an array + + :param data: An array. + :param index: The zero-based index of the element to retrieve. + :param label: The label of the element to retrieve. Throws an `ArrayNotLabeled` exception, if the given + array is not a labeled array and this parameter is set. + :param return_nodata: By default this process throws an `ArrayElementNotAvailable` exception if the index + or label is invalid. If you want to return `null` instead, set this flag to `true`. + + :return: The value of the requested element. + """ + return _process('array_element', data=data, index=index, label=label, return_nodata=return_nodata)
+ + + +
+[docs] +@openeo_process +def array_filter(data, condition, context=UNSET) -> ProcessBuilder: + """ + Filter an array based on a condition + + :param data: An array. + :param condition: A condition that is evaluated against each value, index and/or label in the array. Only + the array elements for which the condition returns `true` are preserved. + :param context: Additional data to be passed to the condition. + + :return: An array filtered by the specified condition. The number of elements are less than or equal + compared to the original array. + """ + return _process('array_filter', + data=data, + condition=build_child_callback(condition, parent_parameters=['x', 'index', 'label', 'context']), + context=context + )
+ + + +
+[docs] +@openeo_process +def array_find(data, value, reverse=UNSET) -> ProcessBuilder: + """ + Get the index for a value in an array + + :param data: List to find the value in. + :param value: Value to find in `data`. If the value is `null`, this process returns always `null`. + :param reverse: By default, this process finds the index of the first match. To return the index of the + last match instead, set this flag to `true`. + + :return: The index of the first element with the specified value. If no element was found, `null` is + returned. + """ + return _process('array_find', data=data, value=value, reverse=reverse)
+ + + +
+[docs] +@openeo_process +def array_find_label(data, label) -> ProcessBuilder: + """ + Get the index for a label in a labeled array + + :param data: List to find the label in. + :param label: Label to find in `data`. + + :return: The index of the element with the specified label assigned. If no such label was found, `null` is + returned. + """ + return _process('array_find_label', data=data, label=label)
+ + + +
+[docs] +@openeo_process +def array_interpolate_linear(data) -> ProcessBuilder: + """ + One-dimensional linear interpolation for arrays + + :param data: An array of numbers and no-data values. If the given array is a labeled array, the labels + must have a natural/inherent label order and the process expects the labels to be sorted accordingly. This + is the default behavior in openEO for spatial and temporal dimensions. + + :return: An array with no-data values being replaced with interpolated values. If not at least 2 numerical + values are available in the array, the array stays the same. + """ + return _process('array_interpolate_linear', data=data)
+ + + +
+[docs] +@openeo_process +def array_labels(data) -> ProcessBuilder: + """ + Get the labels for an array + + :param data: An array. + + :return: The labels or indices as array. + """ + return _process('array_labels', data=data)
+ + + +
+[docs] +@openeo_process +def array_modify(data, values, index, length=UNSET) -> ProcessBuilder: + """ + Change the content of an array (remove, insert, update) + + :param data: The array to modify. + :param values: The values to insert into the `data` array. + :param index: The index in the `data` array of the element to insert the value(s) before. If the index is + greater than the number of elements in the `data` array, the process throws an `ArrayElementNotAvailable` + exception. To insert after the last element, there are two options: 1. Use the simpler processes + ``array_append()`` to append a single value or ``array_concat()`` to append multiple values. 2. Specify the + number of elements in the array. You can retrieve the number of elements with the process ``count()``, + having the parameter `condition` set to `true`. + :param length: The number of elements in the `data` array to remove (or replace) starting from the given + index. If the array contains fewer elements, the process simply removes all elements up to the end. + + :return: An array with values added, updated or removed. + """ + return _process('array_modify', data=data, values=values, index=index, length=length)
+ + + +
+[docs] +@openeo_process +def arsinh(x) -> ProcessBuilder: + """ + Inverse hyperbolic sine + + :param x: A number. + + :return: The computed angle in radians. + """ + return _process('arsinh', x=x)
+ + + +
+[docs] +@openeo_process +def artanh(x) -> ProcessBuilder: + """ + Inverse hyperbolic tangent + + :param x: A number. + + :return: The computed angle in radians. + """ + return _process('artanh', x=x)
+ + + +
+[docs] +@openeo_process +def atmospheric_correction(data, method, elevation_model=UNSET, options=UNSET) -> ProcessBuilder: + """ + Apply atmospheric correction + + :param data: Data cube containing multi-spectral optical top of atmosphere reflectances to be corrected. + :param method: The atmospheric correction method to use. To get reproducible results, you have to set a + specific method. Set to `null` to allow the back-end to choose, which will improve portability, but reduce + reproducibility as you *may* get different results if you run the processes multiple times. + :param elevation_model: The digital elevation model to use. Set to `null` (the default) to allow the back- + end to choose, which will improve portability, but reduce reproducibility. + :param options: Proprietary options for the atmospheric correction method. Specifying proprietary options + will reduce portability. + + :return: Data cube containing bottom of atmosphere reflectances. + """ + return _process('atmospheric_correction', data=data, method=method, elevation_model=elevation_model, options=options)
+ + + +
+[docs] +@openeo_process +def between(x, min, max, exclude_max=UNSET) -> ProcessBuilder: + """ + Between comparison + + :param x: The value to check. + :param min: Lower boundary (inclusive) to check against. + :param max: Upper boundary (inclusive) to check against. + :param exclude_max: Exclude the upper boundary `max` if set to `true`. Defaults to `false`. + + :return: `true` if `x` is between the specified bounds, otherwise `false`. + """ + return _process('between', x=x, min=min, max=max, exclude_max=exclude_max)
+ + + +
+[docs] +@openeo_process +def ceil(x) -> ProcessBuilder: + """ + Round fractions up + + :param x: A number to round up. + + :return: The number rounded up. + """ + return _process('ceil', x=x)
+ + + +
+[docs] +@openeo_process +def climatological_normal(data, period, climatology_period=UNSET) -> ProcessBuilder: + """ + Compute climatology normals + + :param data: A data cube with exactly one temporal dimension. The data cube must span at least the temporal + interval specified in the parameter `climatology-period`. Seasonal periods may span two consecutive years, + e.g. temporal winter that includes months December, January and February. If the required months before the + actual climate period are available, the season is taken into account. If not available, the first season + is not taken into account and the seasonal mean is based on one year less than the other seasonal normals. + The incomplete season at the end of the last year is never taken into account. + :param period: The time intervals to aggregate the average value for. The following pre-defined frequencies + are supported: * `day`: Day of the year * `month`: Month of the year * `climatology-period`: The period + specified in the `climatology-period`. * `season`: Three month periods of the calendar seasons (December - + February, March - May, June - August, September - November). * `tropical-season`: Six month periods of the + tropical seasons (November - April, May - October). + :param climatology_period: The climatology period as a closed temporal interval. The first element of the + array is the first year to be fully included in the temporal interval. The second element is the last year + to be fully included in the temporal interval. The default climatology period is from 1981 until 2010 + (both inclusive) right now, but this might be updated over time to what is commonly used in climatology. If + you don't want to keep your research to be reproducible, please explicitly specify a period. + + :return: A data cube with the same dimensions. The dimension properties (name, type, labels, reference + system and resolution) remain unchanged, except for the resolution and dimension labels of the temporal + dimension. The temporal dimension has the following dimension labels: * `day`: `001` - `365` * `month`: + `01` - `12` * `climatology-period`: `climatology-period` * `season`: `djf` (December - February), `mam` + (March - May), `jja` (June - August), `son` (September - November) * `tropical-season`: `ndjfma` (November + - April), `mjjaso` (May - October) + """ + return _process('climatological_normal', data=data, period=period, climatology_period=climatology_period)
+ + + +
+[docs] +@openeo_process +def clip(x, min, max) -> ProcessBuilder: + """ + Clip a value between a minimum and a maximum + + :param x: A number. + :param min: Minimum value. If the value is lower than this value, the process will return the value of this + parameter. + :param max: Maximum value. If the value is greater than this value, the process will return the value of + this parameter. + + :return: The value clipped to the specified range. + """ + return _process('clip', x=x, min=min, max=max)
+ + + +
+[docs] +@openeo_process +def cloud_detection(data, method, options=UNSET) -> ProcessBuilder: + """ + Create cloud masks + + :param data: The source data cube containing multi-spectral optical top of the atmosphere (TOA) + reflectances on which to perform cloud detection. + :param method: The cloud detection method to use. To get reproducible results, you have to set a specific + method. Set to `null` to allow the back-end to choose, which will improve portability, but reduce + reproducibility as you *may* get different results if you run the processes multiple times. + :param options: Proprietary options for the cloud detection method. Specifying proprietary options will + reduce portability. + + :return: A data cube with bands for the atmospheric disturbances. Each of the masks contains values between + 0 and 1. The data cube has the same spatial and temporal dimensions as the source data cube and a dimension + that contains a dimension label for each of the supported/considered atmospheric disturbance. + """ + return _process('cloud_detection', data=data, method=method, options=options)
+ + + +
+[docs] +@openeo_process +def constant(x) -> ProcessBuilder: + """ + Define a constant value + + :param x: The value of the constant. + + :return: The value of the constant. + """ + return _process('constant', x=x)
+ + + +
+[docs] +@openeo_process +def cos(x) -> ProcessBuilder: + """ + Cosine + + :param x: An angle in radians. + + :return: The computed cosine of `x`. + """ + return _process('cos', x=x)
+ + + +
+[docs] +@openeo_process +def cosh(x) -> ProcessBuilder: + """ + Hyperbolic cosine + + :param x: An angle in radians. + + :return: The computed hyperbolic cosine of `x`. + """ + return _process('cosh', x=x)
+ + + +
+[docs] +@openeo_process +def count(data, condition=UNSET, context=UNSET) -> ProcessBuilder: + """ + Count the number of elements + + :param data: An array with elements of any data type. + :param condition: A condition consists of one or more processes, which in the end return a boolean value. + It is evaluated against each element in the array. An element is counted only if the condition returns + `true`. Defaults to count valid elements in a list (see ``is_valid()``). Setting this parameter to boolean + `true` counts all elements in the list. `false` is not a valid value for this parameter. + :param context: Additional data to be passed to the condition. + + :return: The counted number of elements. + """ + return _process('count', data=data, condition=condition, context=context)
+ + + +
+[docs] +@openeo_process +def create_data_cube() -> ProcessBuilder: + """ + Create an empty data cube + + :return: An empty data cube with no dimensions. + """ + return _process('create_data_cube', )
+ + + +
+[docs] +@openeo_process +def cummax(data, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Cumulative maxima + + :param data: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not and ignores them by default. + Setting this flag to `false` considers no-data values so that `null` is set for all the following elements. + + :return: An array with the computed cumulative maxima. + """ + return _process('cummax', data=data, ignore_nodata=ignore_nodata)
+ + + +
+[docs] +@openeo_process +def cummin(data, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Cumulative minima + + :param data: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not and ignores them by default. + Setting this flag to `false` considers no-data values so that `null` is set for all the following elements. + + :return: An array with the computed cumulative minima. + """ + return _process('cummin', data=data, ignore_nodata=ignore_nodata)
+ + + +
+[docs] +@openeo_process +def cumproduct(data, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Cumulative products + + :param data: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not and ignores them by default. + Setting this flag to `false` considers no-data values so that `null` is set for all the following elements. + + :return: An array with the computed cumulative products. + """ + return _process('cumproduct', data=data, ignore_nodata=ignore_nodata)
+ + + +
+[docs] +@openeo_process +def cumsum(data, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Cumulative sums + + :param data: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not and ignores them by default. + Setting this flag to `false` considers no-data values so that `null` is set for all the following elements. + + :return: An array with the computed cumulative sums. + """ + return _process('cumsum', data=data, ignore_nodata=ignore_nodata)
+ + + +
+[docs] +@openeo_process +def date_between(x, min, max, exclude_max=UNSET) -> ProcessBuilder: + """ + Between comparison for dates and times + + :param x: The value to check. + :param min: Lower boundary (inclusive) to check against. + :param max: Upper boundary (inclusive) to check against. + :param exclude_max: Exclude the upper boundary `max` if set to `true`. Defaults to `false`. + + :return: `true` if `x` is between the specified bounds, otherwise `false`. + """ + return _process('date_between', x=x, min=min, max=max, exclude_max=exclude_max)
+ + + +
+[docs] +@openeo_process +def date_difference(date1, date2, unit=UNSET) -> ProcessBuilder: + """ + Computes the difference between two time instants + + :param date1: The base date, optionally with a time component. + :param date2: The other date, optionally with a time component. + :param unit: The unit for the returned value. The following units are available: - millisecond - second - + leap seconds are ignored in computations. - minute - hour - day - month - year + + :return: Returns the difference between date1 and date2 in the given unit (seconds by default), including a + fractional part if required. For comparison purposes this means: - If `date1` < `date2`, the returned + value is positive. - If `date1` = `date2`, the returned value is 0. - If `date1` > `date2`, the returned + value is negative. + """ + return _process('date_difference', date1=date1, date2=date2, unit=unit)
+ + + +
+[docs] +@openeo_process +def date_shift(date, value, unit) -> ProcessBuilder: + """ + Manipulates dates and times by addition or subtraction + + :param date: The date (and optionally time) to manipulate. If the given date doesn't include the time, the + process assumes that the time component is `00:00:00Z` (i.e. midnight, in UTC). The millisecond part of the + time is optional and defaults to `0` if not given. + :param value: The period of time in the unit given that is added (positive numbers) or subtracted (negative + numbers). The value `0` doesn't have any effect. + :param unit: The unit for the value given. The following pre-defined units are available: - millisecond: + Milliseconds - second: Seconds - leap seconds are ignored in computations. - minute: Minutes - hour: Hours + - day: Days - changes only the the day part of a date - week: Weeks (equivalent to 7 days) - month: Months + - year: Years Manipulations with the unit `year`, `month`, `week` or `day` do never change the time. If + any of the manipulations result in an invalid date or time, the corresponding part is rounded down to the + next valid date or time respectively. For example, adding a month to `2020-01-31` would result in + `2020-02-29`. + + :return: The manipulated date. If a time component was given in the parameter `date`, the time component is + returned with the date. + """ + return _process('date_shift', date=date, value=value, unit=unit)
+ + + +
+[docs] +@openeo_process +def dimension_labels(data, dimension) -> ProcessBuilder: + """ + Get the dimension labels + + :param data: The data cube. + :param dimension: The name of the dimension to get the labels for. + + :return: The labels as an array. + """ + return _process('dimension_labels', data=data, dimension=dimension)
+ + + +
+[docs] +@openeo_process +def divide(x, y) -> ProcessBuilder: + """ + Division of two numbers + + :param x: The dividend. + :param y: The divisor. + + :return: The computed result. + """ + return _process('divide', x=x, y=y)
+ + + +
+[docs] +@openeo_process +def drop_dimension(data, name) -> ProcessBuilder: + """ + Remove a dimension + + :param data: The data cube to drop a dimension from. + :param name: Name of the dimension to drop. + + :return: A data cube without the specified dimension. The number of dimensions decreases by one, but the + dimension properties (name, type, labels, reference system and resolution) for all other dimensions remain + unchanged. + """ + return _process('drop_dimension', data=data, name=name)
+ + + +
+[docs] +@openeo_process +def e() -> ProcessBuilder: + """ + Euler's number (e) + + :return: The numerical value of Euler's number. + """ + return _process('e', )
+ + + +
+[docs] +@openeo_process +def eq(x, y, delta=UNSET, case_sensitive=UNSET) -> ProcessBuilder: + """ + Equal to comparison + + :param x: First operand. + :param y: Second operand. + :param delta: Only applicable for comparing two numbers. If this optional parameter is set to a positive + non-zero number the equality of two numbers is checked against a delta value. This is especially useful to + circumvent problems with floating-point inaccuracy in machine-based computation. This option is basically + an alias for the following computation: `lte(abs(minus([x, y]), delta)` + :param case_sensitive: Only applicable for comparing two strings. Case sensitive comparison can be disabled + by setting this parameter to `false`. + + :return: `true` if `x` is equal to `y`, `null` if any operand is `null`, otherwise `false`. + """ + return _process('eq', x=x, y=y, delta=delta, case_sensitive=case_sensitive)
+ + + +
+[docs] +@openeo_process +def exp(p) -> ProcessBuilder: + """ + Exponentiation to the base e + + :param p: The numerical exponent. + + :return: The computed value for *e* raised to the power of `p`. + """ + return _process('exp', p=p)
+ + + +
+[docs] +@openeo_process +def extrema(data, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Minimum and maximum values + + :param data: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. Setting + this flag to `false` considers no-data values so that an array with two `null` values is returned if any + value is such a value. + + :return: An array containing the minimum and maximum values for the specified numbers. The first element is + the minimum, the second element is the maximum. If the input array is empty both elements are set to + `null`. + """ + return _process('extrema', data=data, ignore_nodata=ignore_nodata)
+ + + +
+[docs] +@openeo_process +def filter_bands(data, bands=UNSET, wavelengths=UNSET) -> ProcessBuilder: + """ + Filter the bands by names + + :param data: A data cube with bands. + :param bands: A list of band names. Either the unique band name (metadata field `name` in bands) or one of + the common band names (metadata field `common_name` in bands). If the unique band name and the common name + conflict, the unique band name has a higher priority. The order of the specified array defines the order + of the bands in the data cube. If multiple bands match a common name, all matched bands are included in the + original order. + :param wavelengths: A list of sub-lists with each sub-list consisting of two elements. The first element is + the minimum wavelength and the second element is the maximum wavelength. Wavelengths are specified in + micrometers (μm). The order of the specified array defines the order of the bands in the data cube. If + multiple bands match the wavelengths, all matched bands are included in the original order. + + :return: A data cube limited to a subset of its original bands. The dimensions and dimension properties + (name, type, labels, reference system and resolution) remain unchanged, except that the dimension of type + `bands` has less (or the same) dimension labels. + """ + return _process('filter_bands', data=data, bands=bands, wavelengths=wavelengths)
+ + + +
+[docs] +@openeo_process +def filter_bbox(data, extent) -> ProcessBuilder: + """ + Spatial filter using a bounding box + + :param data: A data cube. + :param extent: A bounding box, which may include a vertical axis (see `base` and `height`). + + :return: A data cube restricted to the bounding box. The dimensions and dimension properties (name, type, + labels, reference system and resolution) remain unchanged, except that the spatial dimensions have less (or + the same) dimension labels. + """ + return _process('filter_bbox', data=data, extent=extent)
+ + + +
+[docs] +@openeo_process +def filter_labels(data, condition, dimension, context=UNSET) -> ProcessBuilder: + """ + Filter dimension labels based on a condition + + :param data: A data cube. + :param condition: A condition that is evaluated against each dimension label in the specified dimension. A + dimension label and the corresponding data is preserved for the given dimension, if the condition returns + `true`. + :param dimension: The name of the dimension to filter on. Fails with a `DimensionNotAvailable` exception if + the specified dimension does not exist. + :param context: Additional data to be passed to the condition. + + :return: A data cube with the same dimensions. The dimension properties (name, type, labels, reference + system and resolution) remain unchanged, except that the given dimension has less (or the same) dimension + labels. + """ + return _process('filter_labels', + data=data, + condition=build_child_callback(condition, parent_parameters=['value', 'context']), + dimension=dimension, + context=context + )
+ + + +
+[docs] +@openeo_process +def filter_spatial(data, geometries) -> ProcessBuilder: + """ + Spatial filter raster data cubes using geometries + + :param data: A raster data cube. + :param geometries: One or more geometries used for filtering, given as GeoJSON or vector data cube. If + multiple geometries are provided, the union of them is used. Empty geometries are ignored. Limits the data + cube to the bounding box of the given geometries. No implicit masking gets applied. To mask the pixels of + the data cube use ``mask_polygon()``. + + :return: A raster data cube restricted to the specified geometries. The dimensions and dimension properties + (name, type, labels, reference system and resolution) remain unchanged, except that the spatial dimensions + have less (or the same) dimension labels. + """ + return _process('filter_spatial', data=data, geometries=geometries)
+ + + +
+[docs] +@openeo_process +def filter_temporal(data, extent, dimension=UNSET) -> ProcessBuilder: + """ + Temporal filter based on temporal intervals + + :param data: A data cube. + :param extent: Left-closed temporal interval, i.e. an array with exactly two elements: 1. The first + element is the start of the temporal interval. The specified time instant is **included** in the interval. + 2. The second element is the end of the temporal interval. The specified time instant is **excluded** from + the interval. The second element must always be greater/later than the first element. Otherwise, a + `TemporalExtentEmpty` exception is thrown. Also supports unbounded intervals by setting one of the + boundaries to `null`, but never both. + :param dimension: The name of the temporal dimension to filter on. If no specific dimension is specified, + the filter applies to all temporal dimensions. Fails with a `DimensionNotAvailable` exception if the + specified dimension does not exist. + + :return: A data cube restricted to the specified temporal extent. The dimensions and dimension properties + (name, type, labels, reference system and resolution) remain unchanged, except that the temporal dimensions + (determined by `dimensions` parameter) may have less dimension labels. + """ + return _process('filter_temporal', data=data, extent=extent, dimension=dimension)
+ + + +
+[docs] +@openeo_process +def filter_vector(data, geometries, relation=UNSET) -> ProcessBuilder: + """ + Spatial vector filter using geometries + + :param data: A vector data cube with the candidate geometries. + :param geometries: One or more base geometries used for filtering, given as vector data cube. If multiple + base geometries are provided, the union of them is used. + :param relation: The spatial filter predicate for comparing the geometries provided through (a) + `geometries` (base geometries) and (b) `data` (candidate geometries). + + :return: A vector data cube restricted to the specified geometries. The dimensions and dimension properties + (name, type, labels, reference system and resolution) remain unchanged, except that the geometries + dimension has less (or the same) dimension labels. + """ + return _process('filter_vector', data=data, geometries=geometries, relation=relation)
+ + + +
+[docs] +@openeo_process +def first(data, ignore_nodata=UNSET) -> ProcessBuilder: + """ + First element + + :param data: An array with elements of any data type. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. Setting + this flag to `false` considers no-data values so that `null` is returned if the first value is such a + value. + + :return: The first element of the input array. + """ + return _process('first', data=data, ignore_nodata=ignore_nodata)
+ + + +
+[docs] +@openeo_process +def fit_curve(data, parameters, function, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Curve fitting + + :param data: A labeled array, the labels correspond to the variable `y` and the values correspond to the + variable `x`. + :param parameters: Defined the number of parameters for the model function and provides an initial guess + for them. At least one parameter is required. + :param function: The model function. It must take the parameters to fit as array through the first argument + and the independent variable `x` as the second argument. It is recommended to store the model function as + a user-defined process on the back-end to be able to re-use the model function with the computed optimal + values for the parameters afterwards. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. Setting + this flag to `false` considers no-data values so that `null` is passed to the model function. + + :return: An array with the optimal values for the parameters. + """ + return _process('fit_curve', + data=data, + parameters=parameters, + function=build_child_callback(function, parent_parameters=['x', 'parameters']), + ignore_nodata=ignore_nodata + )
+ + + +
+[docs] +@openeo_process +def flatten_dimensions(data, dimensions, target_dimension, label_separator=UNSET) -> ProcessBuilder: + """ + Combine multiple dimensions into a single dimension + + :param data: A data cube. + :param dimensions: The names of the dimension to combine. The order of the array defines the order in which + the dimension labels and values are combined (see the example in the process description). Fails with a + `DimensionNotAvailable` exception if at least one of the specified dimensions does not exist. + :param target_dimension: The name of the new target dimension. A new dimensions will be created with the + given names and type `other` (see ``add_dimension()``). Fails with a `TargetDimensionExists` exception if a + dimension with the specified name exists. + :param label_separator: The string that will be used as a separator for the concatenated dimension labels. + To unambiguously revert the dimension labels with the process ``unflatten_dimension()``, the given string + must not be contained in any of the dimension labels. + + :return: A data cube with the new shape. The dimension properties (name, type, labels, reference system and + resolution) for all other dimensions remain unchanged. + """ + return _process('flatten_dimensions', data=data, dimensions=dimensions, target_dimension=target_dimension, label_separator=label_separator)
+ + + +
+[docs] +@openeo_process +def floor(x) -> ProcessBuilder: + """ + Round fractions down + + :param x: A number to round down. + + :return: The number rounded down. + """ + return _process('floor', x=x)
+ + + +
+[docs] +@openeo_process +def gt(x, y) -> ProcessBuilder: + """ + Greater than comparison + + :param x: First operand. + :param y: Second operand. + + :return: `true` if `x` is strictly greater than `y` or `null` if any operand is `null`, otherwise `false`. + """ + return _process('gt', x=x, y=y)
+ + + +
+[docs] +@openeo_process +def gte(x, y) -> ProcessBuilder: + """ + Greater than or equal to comparison + + :param x: First operand. + :param y: Second operand. + + :return: `true` if `x` is greater than or equal to `y`, `null` if any operand is `null`, otherwise `false`. + """ + return _process('gte', x=x, y=y)
+ + + +
+[docs] +@openeo_process +def if_(value, accept, reject=UNSET) -> ProcessBuilder: + """ + If-Then-Else conditional + + :param value: A boolean value. + :param accept: A value that is returned if the boolean value is `true`. + :param reject: A value that is returned if the boolean value is **not** `true`. Defaults to `null`. + + :return: Either the `accept` or `reject` argument depending on the given boolean value. + """ + return _process('if', value=value, accept=accept, reject=reject)
+ + + +
+[docs] +@openeo_process +def inspect(data, message=UNSET, code=UNSET, level=UNSET) -> ProcessBuilder: + """ + Add information to the logs + + :param data: Data to log. + :param message: A message to send in addition to the data. + :param code: A label to help identify one or more log entries originating from this process in the list of + all log entries. It can help to group or filter log entries and is usually not unique. + :param level: The severity level of this message, defaults to `info`. + + :return: The data as passed to the `data` parameter without any modification. + """ + return _process('inspect', data=data, message=message, code=code, level=level)
+ + + +
+[docs] +@openeo_process +def int(x) -> ProcessBuilder: + """ + Integer part of a number + + :param x: A number. + + :return: Integer part of the number. + """ + return _process('int', x=x)
+ + + +
+[docs] +@openeo_process +def is_infinite(x) -> ProcessBuilder: + """ + Value is an infinite number + + :param x: The data to check. + + :return: `true` if the data is an infinite number, otherwise `false`. + """ + return _process('is_infinite', x=x)
+ + + +
+[docs] +@openeo_process +def is_nan(x) -> ProcessBuilder: + """ + Value is not a number + + :param x: The data to check. + + :return: Returns `true` for `NaN` and all non-numeric data types, otherwise returns `false`. + """ + return _process('is_nan', x=x)
+ + + +
+[docs] +@openeo_process +def is_nodata(x) -> ProcessBuilder: + """ + Value is a no-data value + + :param x: The data to check. + + :return: `true` if the data is a no-data value, otherwise `false`. + """ + return _process('is_nodata', x=x)
+ + + +
+[docs] +@openeo_process +def is_valid(x) -> ProcessBuilder: + """ + Value is valid data + + :param x: The data to check. + + :return: `true` if the data is valid, otherwise `false`. + """ + return _process('is_valid', x=x)
+ + + +
+[docs] +@openeo_process +def last(data, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Last element + + :param data: An array with elements of any data type. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. Setting + this flag to `false` considers no-data values so that `null` is returned if the last value is such a value. + + :return: The last element of the input array. + """ + return _process('last', data=data, ignore_nodata=ignore_nodata)
+ + + +
+[docs] +@openeo_process +def linear_scale_range(x, inputMin, inputMax, outputMin=UNSET, outputMax=UNSET) -> ProcessBuilder: + """ + Linear transformation between two ranges + + :param x: A number to transform. The number gets clipped to the bounds specified in `inputMin` and + `inputMax`. + :param inputMin: Minimum value the input can obtain. + :param inputMax: Maximum value the input can obtain. + :param outputMin: Minimum value of the desired output range. + :param outputMax: Maximum value of the desired output range. + + :return: The transformed number. + """ + return _process('linear_scale_range', x=x, inputMin=inputMin, inputMax=inputMax, outputMin=outputMin, outputMax=outputMax)
+ + + +
+[docs] +@openeo_process +def ln(x) -> ProcessBuilder: + """ + Natural logarithm + + :param x: A number to compute the natural logarithm for. + + :return: The computed natural logarithm. + """ + return _process('ln', x=x)
+ + + +
+[docs] +@openeo_process +def load_collection(id, spatial_extent, temporal_extent, bands=UNSET, properties=UNSET) -> ProcessBuilder: + """ + Load a collection + + :param id: The collection id. + :param spatial_extent: Limits the data to load from the collection to the specified bounding box or + polygons. * For raster data, the process loads the pixel into the data cube if the point at the pixel + center intersects with the bounding box or any of the polygons (as defined in the Simple Features standard + by the OGC). * For vector data, the process loads the geometry into the data cube if the geometry is fully + *within* the bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). + Empty geometries may only be in the data cube if no spatial extent has been provided. The GeoJSON can be + one of the following feature types: * A `Polygon` or `MultiPolygon` geometry, * a `Feature` with a + `Polygon` or `MultiPolygon` geometry, or * a `FeatureCollection` containing at least one `Feature` with + `Polygon` or `MultiPolygon` geometries. * Empty geometries are ignored. Set this parameter to `null` to + set no limit for the spatial extent. Be careful with this when loading large datasets! It is recommended to + use this parameter instead of using ``filter_bbox()`` or ``filter_spatial()`` directly after loading + unbounded data. + :param temporal_extent: Limits the data to load from the collection to the specified left-closed temporal + interval. Applies to all temporal dimensions. The interval has to be specified as an array with exactly two + elements: 1. The first element is the start of the temporal interval. The specified time instant is + **included** in the interval. 2. The second element is the end of the temporal interval. The specified time + instant is **excluded** from the interval. The second element must always be greater/later than the first + element. Otherwise, a `TemporalExtentEmpty` exception is thrown. Also supports unbounded intervals by + setting one of the boundaries to `null`, but never both. Set this parameter to `null` to set no limit for + the temporal extent. Be careful with this when loading large datasets! It is recommended to use this + parameter instead of using ``filter_temporal()`` directly after loading unbounded data. + :param bands: Only adds the specified bands into the data cube so that bands that don't match the list of + band names are not available. Applies to all dimensions of type `bands`. Either the unique band name + (metadata field `name` in bands) or one of the common band names (metadata field `common_name` in bands) + can be specified. If the unique band name and the common name conflict, the unique band name has a higher + priority. The order of the specified array defines the order of the bands in the data cube. If multiple + bands match a common name, all matched bands are included in the original order. It is recommended to use + this parameter instead of using ``filter_bands()`` directly after loading unbounded data. + :param properties: Limits the data by metadata properties to include only data in the data cube which all + given conditions return `true` for (AND operation). Specify key-value-pairs with the key being the name of + the metadata property, which can be retrieved with the openEO Data Discovery for Collections. The value + must be a condition (user-defined process) to be evaluated against the collection metadata, see the + example. + + :return: A data cube for further processing. The dimensions and dimension properties (name, type, labels, + reference system and resolution) correspond to the collection's metadata, but the dimension labels are + restricted as specified in the parameters. + """ + return _process('load_collection', id=id, spatial_extent=spatial_extent, temporal_extent=temporal_extent, bands=bands, properties=properties)
+ + + +
+[docs] +@openeo_process +def load_geojson(data, properties=UNSET) -> ProcessBuilder: + """ + Converts GeoJSON into a vector data cube + + :param data: A GeoJSON object to convert into a vector data cube. The GeoJSON type `GeometryCollection` is + not supported. Each geometry in the GeoJSON data results in a dimension label in the `geometries` + dimension. + :param properties: A list of properties from the GeoJSON file to construct an additional dimension from. A + new dimension with the name `properties` and type `other` is created if at least one property is provided. + Only applies for GeoJSON Features and FeatureCollections. Missing values are generally set to no-data + (`null`). Depending on the number of properties provided, the process creates the dimension differently: + - Single property with scalar values: A single dimension label with the name of the property and a single + value per geometry. - Single property of type array: The dimension labels correspond to the array indices. + There are as many values and labels per geometry as there are for the largest array. - Multiple properties + with scalar values: The dimension labels correspond to the property names. There are as many values and + labels per geometry as there are properties provided here. + + :return: A vector data cube containing the geometries, either one or two dimensional. + """ + return _process('load_geojson', data=data, properties=properties)
+ + + +
+[docs] +@openeo_process +def load_ml_model(id) -> ProcessBuilder: + """ + Load a ML model + + :param id: The STAC Item to load the machine learning model from. The STAC Item must implement the `ml- + model` extension. + + :return: A machine learning model to be used with machine learning processes such as + ``predict_random_forest()``. + """ + return _process('load_ml_model', id=id)
+ + + +
+[docs] +@openeo_process +def load_result(id, spatial_extent=UNSET, temporal_extent=UNSET, bands=UNSET) -> ProcessBuilder: + """ + Load batch job results + + :param id: The id of a batch job with results. + :param spatial_extent: Limits the data to load from the batch job result to the specified bounding box or + polygons. * For raster data, the process loads the pixel into the data cube if the point at the pixel + center intersects with the bounding box or any of the polygons (as defined in the Simple Features standard + by the OGC). * For vector data, the process loads the geometry into the data cube of the geometry is fully + within the bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). + Empty geometries may only be in the data cube if no spatial extent has been provided. The GeoJSON can be + one of the following feature types: * A `Polygon` or `MultiPolygon` geometry, * a `Feature` with a + `Polygon` or `MultiPolygon` geometry, or * a `FeatureCollection` containing at least one `Feature` with + `Polygon` or `MultiPolygon` geometries. Set this parameter to `null` to set no limit for the spatial + extent. Be careful with this when loading large datasets! It is recommended to use this parameter instead + of using ``filter_bbox()`` or ``filter_spatial()`` directly after loading unbounded data. + :param temporal_extent: Limits the data to load from the batch job result to the specified left-closed + temporal interval. Applies to all temporal dimensions. The interval has to be specified as an array with + exactly two elements: 1. The first element is the start of the temporal interval. The specified instance + in time is **included** in the interval. 2. The second element is the end of the temporal interval. The + specified instance in time is **excluded** from the interval. The specified temporal strings follow [RFC + 3339](https://www.rfc-editor.org/rfc/rfc3339.html). Also supports open intervals by setting one of the + boundaries to `null`, but never both. Set this parameter to `null` to set no limit for the temporal + extent. Be careful with this when loading large datasets! It is recommended to use this parameter instead + of using ``filter_temporal()`` directly after loading unbounded data. + :param bands: Only adds the specified bands into the data cube so that bands that don't match the list of + band names are not available. Applies to all dimensions of type `bands`. Either the unique band name + (metadata field `name` in bands) or one of the common band names (metadata field `common_name` in bands) + can be specified. If the unique band name and the common name conflict, the unique band name has a higher + priority. The order of the specified array defines the order of the bands in the data cube. If multiple + bands match a common name, all matched bands are included in the original order. It is recommended to use + this parameter instead of using ``filter_bands()`` directly after loading unbounded data. + + :return: A data cube for further processing. + """ + return _process('load_result', id=id, spatial_extent=spatial_extent, temporal_extent=temporal_extent, bands=bands)
+ + + +
+[docs] +@openeo_process +def load_stac(url, spatial_extent=UNSET, temporal_extent=UNSET, bands=UNSET, properties=UNSET) -> ProcessBuilder: + """ + Loads data from STAC + + :param url: The URL to a static STAC catalog (STAC Item, STAC Collection, or STAC Catalog) or a specific + STAC API Collection that allows to filter items and to download assets. This includes batch job results, + which itself are compliant to STAC. For external URLs, authentication details such as API keys or tokens + may need to be included in the URL. Batch job results can be specified in two ways: - For Batch job + results at the same back-end, a URL pointing to the corresponding batch job results endpoint should be + provided. The URL usually ends with `/jobs/{id}/results` and `{id}` is the corresponding batch job ID. - + For external results, a signed URL must be provided. Not all back-ends support signed URLs, which are + provided as a link with the link relation `canonical` in the batch job result metadata. + :param spatial_extent: Limits the data to load to the specified bounding box or polygons. * For raster + data, the process loads the pixel into the data cube if the point at the pixel center intersects with the + bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). * For vector + data, the process loads the geometry into the data cube if the geometry is fully within the bounding box or + any of the polygons (as defined in the Simple Features standard by the OGC). Empty geometries may only be + in the data cube if no spatial extent has been provided. The GeoJSON can be one of the following feature + types: * A `Polygon` or `MultiPolygon` geometry, * a `Feature` with a `Polygon` or `MultiPolygon` + geometry, or * a `FeatureCollection` containing at least one `Feature` with `Polygon` or `MultiPolygon` + geometries. Set this parameter to `null` to set no limit for the spatial extent. Be careful with this when + loading large datasets! It is recommended to use this parameter instead of using ``filter_bbox()`` or + ``filter_spatial()`` directly after loading unbounded data. + :param temporal_extent: Limits the data to load to the specified left-closed temporal interval. Applies to + all temporal dimensions. The interval has to be specified as an array with exactly two elements: 1. The + first element is the start of the temporal interval. The specified instance in time is **included** in the + interval. 2. The second element is the end of the temporal interval. The specified instance in time is + **excluded** from the interval. The second element must always be greater/later than the first element. + Otherwise, a `TemporalExtentEmpty` exception is thrown. Also supports open intervals by setting one of the + boundaries to `null`, but never both. Set this parameter to `null` to set no limit for the temporal + extent. Be careful with this when loading large datasets! It is recommended to use this parameter instead + of using ``filter_temporal()`` directly after loading unbounded data. + :param bands: Only adds the specified bands into the data cube so that bands that don't match the list of + band names are not available. Applies to all dimensions of type `bands`. Either the unique band name + (metadata field `name` in bands) or one of the common band names (metadata field `common_name` in bands) + can be specified. If the unique band name and the common name conflict, the unique band name has a higher + priority. The order of the specified array defines the order of the bands in the data cube. If multiple + bands match a common name, all matched bands are included in the original order. It is recommended to use + this parameter instead of using ``filter_bands()`` directly after loading unbounded data. + :param properties: Limits the data by metadata properties to include only data in the data cube which all + given conditions return `true` for (AND operation). Specify key-value-pairs with the key being the name of + the metadata property, which can be retrieved with the openEO Data Discovery for Collections. The value + must be a condition (user-defined process) to be evaluated against a STAC API. This parameter is not + supported for static STAC. + + :return: A data cube for further processing. + """ + return _process('load_stac', url=url, spatial_extent=spatial_extent, temporal_extent=temporal_extent, bands=bands, properties=properties)
+ + + +
+[docs] +@openeo_process +def load_uploaded_files(paths, format, options=UNSET) -> ProcessBuilder: + """ + Load files from the user workspace + + :param paths: The files to read. Folders can't be specified, specify all files instead. An exception is + thrown if a file can't be read. + :param format: The file format to read from. It must be one of the values that the server reports as + supported input file formats, which usually correspond to the short GDAL/OGR codes. If the format is not + suitable for loading the data, a `FormatUnsuitable` exception will be thrown. This parameter is *case + insensitive*. + :param options: The file format parameters to be used to read the files. Must correspond to the parameters + that the server reports as supported parameters for the chosen `format`. The parameter names and valid + values usually correspond to the GDAL/OGR format options. + + :return: A data cube for further processing. + """ + return _process('load_uploaded_files', paths=paths, format=format, options=options)
+ + + +
+[docs] +@openeo_process +def load_url(url, format, options=UNSET) -> ProcessBuilder: + """ + Load data from a URL + + :param url: The URL to read from. Authentication details such as API keys or tokens may need to be included + in the URL. + :param format: The file format to use when loading the data. It must be one of the values that the server + reports as supported input file formats, which usually correspond to the short GDAL/OGR codes. If the + format is not suitable for loading the data, a `FormatUnsuitable` exception will be thrown. This parameter + is *case insensitive*. + :param options: The file format parameters to use when reading the data. Must correspond to the parameters + that the server reports as supported parameters for the chosen `format`. The parameter names and valid + values usually correspond to the GDAL/OGR format options. + + :return: A data cube for further processing. + """ + return _process('load_url', url=url, format=format, options=options)
+ + + +
+[docs] +@openeo_process +def log(x, base) -> ProcessBuilder: + """ + Logarithm to a base + + :param x: A number to compute the logarithm for. + :param base: The numerical base. + + :return: The computed logarithm. + """ + return _process('log', x=x, base=base)
+ + + +
+[docs] +@openeo_process +def lt(x, y) -> ProcessBuilder: + """ + Less than comparison + + :param x: First operand. + :param y: Second operand. + + :return: `true` if `x` is strictly less than `y`, `null` if any operand is `null`, otherwise `false`. + """ + return _process('lt', x=x, y=y)
+ + + +
+[docs] +@openeo_process +def lte(x, y) -> ProcessBuilder: + """ + Less than or equal to comparison + + :param x: First operand. + :param y: Second operand. + + :return: `true` if `x` is less than or equal to `y`, `null` if any operand is `null`, otherwise `false`. + """ + return _process('lte', x=x, y=y)
+ + + +
+[docs] +@openeo_process +def mask(data, mask, replacement=UNSET) -> ProcessBuilder: + """ + Apply a raster mask + + :param data: A raster data cube. + :param mask: A mask as a raster data cube. Every pixel in `data` must have a corresponding element in + `mask`. + :param replacement: The value used to replace masked values with. + + :return: A masked raster data cube with the same dimensions. The dimension properties (name, type, labels, + reference system and resolution) remain unchanged. + """ + return _process('mask', data=data, mask=mask, replacement=replacement)
+ + + +
+[docs] +@openeo_process +def mask_polygon(data, mask, replacement=UNSET, inside=UNSET) -> ProcessBuilder: + """ + Apply a polygon mask + + :param data: A raster data cube. + :param mask: A GeoJSON object or a vector data cube containing at least one polygon. The provided vector + data can be one of the following: * A `Polygon` or `MultiPolygon` geometry, * a `Feature` with a `Polygon` + or `MultiPolygon` geometry, or * a `FeatureCollection` containing at least one `Feature` with `Polygon` or + `MultiPolygon` geometries. * Empty geometries are ignored. + :param replacement: The value used to replace masked values with. + :param inside: If set to `true` all pixels for which the point at the pixel center **does** intersect with + any polygon are replaced. + + :return: A masked raster data cube with the same dimensions. The dimension properties (name, type, labels, + reference system and resolution) remain unchanged. + """ + return _process('mask_polygon', data=data, mask=mask, replacement=replacement, inside=inside)
+ + + +
+[docs] +@openeo_process +def max(data, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Maximum value + + :param data: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. Setting + this flag to `false` considers no-data values so that `null` is returned if any value is such a value. + + :return: The maximum value. + """ + return _process('max', data=data, ignore_nodata=ignore_nodata)
+ + + +
+[docs] +@openeo_process +def mean(data, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Arithmetic mean (average) + + :param data: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. Setting + this flag to `false` considers no-data values so that `null` is returned if any value is such a value. + + :return: The computed arithmetic mean. + """ + return _process('mean', data=data, ignore_nodata=ignore_nodata)
+ + + +
+[docs] +@openeo_process +def median(data, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Statistical median + + :param data: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. Setting + this flag to `false` considers no-data values so that `null` is returned if any value is such a value. + + :return: The computed statistical median. + """ + return _process('median', data=data, ignore_nodata=ignore_nodata)
+ + + +
+[docs] +@openeo_process +def merge_cubes(cube1, cube2, overlap_resolver=UNSET, context=UNSET) -> ProcessBuilder: + """ + Merge two data cubes + + :param cube1: The base data cube. + :param cube2: The other data cube to be merged with the base data cube. + :param overlap_resolver: A reduction operator that resolves the conflict if the data overlaps. The reducer + must return a value of the same data type as the input values are. The reduction operator may be a single + process such as ``multiply()`` or consist of multiple sub-processes. `null` (the default) can be specified + if no overlap resolver is required. + :param context: Additional data to be passed to the overlap resolver. + + :return: The merged data cube. See the process description for details regarding the dimensions and + dimension properties (name, type, labels, reference system and resolution). + """ + return _process('merge_cubes', + cube1=cube1, + cube2=cube2, + overlap_resolver=(build_child_callback(overlap_resolver, parent_parameters=['x', 'y', 'context']) if overlap_resolver not in [None, UNSET] else overlap_resolver), + context=context + )
+ + + +
+[docs] +@openeo_process +def min(data, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Minimum value + + :param data: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. Setting + this flag to `false` considers no-data values so that `null` is returned if any value is such a value. + + :return: The minimum value. + """ + return _process('min', data=data, ignore_nodata=ignore_nodata)
+ + + +
+[docs] +@openeo_process +def mod(x, y) -> ProcessBuilder: + """ + Modulo + + :param x: A number to be used as the dividend. + :param y: A number to be used as the divisor. + + :return: The remainder after division. + """ + return _process('mod', x=x, y=y)
+ + + +
+[docs] +@openeo_process +def multiply(x, y) -> ProcessBuilder: + """ + Multiplication of two numbers + + :param x: The multiplier. + :param y: The multiplicand. + + :return: The computed product of the two numbers. + """ + return _process('multiply', x=x, y=y)
+ + + +
+[docs] +@openeo_process +def nan() -> ProcessBuilder: + """ + Not a Number (NaN) + + :return: Returns `NaN`. + """ + return _process('nan', )
+ + + +
+[docs] +@openeo_process +def ndvi(data, nir=UNSET, red=UNSET, target_band=UNSET) -> ProcessBuilder: + """ + Normalized Difference Vegetation Index + + :param data: A raster data cube with two bands that have the common names `red` and `nir` assigned. + :param nir: The name of the NIR band. Defaults to the band that has the common name `nir` assigned. Either + the unique band name (metadata field `name` in bands) or one of the common band names (metadata field + `common_name` in bands) can be specified. If the unique band name and the common name conflict, the unique + band name has a higher priority. + :param red: The name of the red band. Defaults to the band that has the common name `red` assigned. Either + the unique band name (metadata field `name` in bands) or one of the common band names (metadata field + `common_name` in bands) can be specified. If the unique band name and the common name conflict, the unique + band name has a higher priority. + :param target_band: By default, the dimension of type `bands` is dropped. To keep the dimension specify a + new band name in this parameter so that a new dimension label with the specified name will be added for the + computed values. + + :return: A raster data cube containing the computed NDVI values. The structure of the data cube differs + depending on the value passed to `target_band`: * `target_band` is `null`: The data cube does not contain + the dimension of type `bands`, the number of dimensions decreases by one. The dimension properties (name, + type, labels, reference system and resolution) for all other dimensions remain unchanged. * `target_band` + is a string: The data cube keeps the same dimensions. The dimension properties remain unchanged, but the + number of dimension labels for the dimension of type `bands` increases by one. The additional label is + named as specified in `target_band`. + """ + return _process('ndvi', data=data, nir=nir, red=red, target_band=target_band)
+ + + +
+[docs] +@openeo_process +def neq(x, y, delta=UNSET, case_sensitive=UNSET) -> ProcessBuilder: + """ + Not equal to comparison + + :param x: First operand. + :param y: Second operand. + :param delta: Only applicable for comparing two numbers. If this optional parameter is set to a positive + non-zero number the non-equality of two numbers is checked against a delta value. This is especially useful + to circumvent problems with floating-point inaccuracy in machine-based computation. This option is + basically an alias for the following computation: `gt(abs(minus([x, y]), delta)` + :param case_sensitive: Only applicable for comparing two strings. Case sensitive comparison can be disabled + by setting this parameter to `false`. + + :return: `true` if `x` is *not* equal to `y`, `null` if any operand is `null`, otherwise `false`. + """ + return _process('neq', x=x, y=y, delta=delta, case_sensitive=case_sensitive)
+ + + +
+[docs] +@openeo_process +def normalized_difference(x, y) -> ProcessBuilder: + """ + Normalized difference + + :param x: The value for the first band. + :param y: The value for the second band. + + :return: The computed normalized difference. + """ + return _process('normalized_difference', x=x, y=y)
+ + + +
+[docs] +@openeo_process +def not_(x) -> ProcessBuilder: + """ + Inverting a boolean + + :param x: Boolean value to invert. + + :return: Inverted boolean value. + """ + return _process('not', x=x)
+ + + +
+[docs] +@openeo_process +def or_(x, y) -> ProcessBuilder: + """ + Logical OR + + :param x: A boolean value. + :param y: A boolean value. + + :return: Boolean result of the logical OR. + """ + return _process('or', x=x, y=y)
+ + + +
+[docs] +@openeo_process +def order(data, asc=UNSET, nodata=UNSET) -> ProcessBuilder: + """ + Get the order of array elements + + :param data: An array to compute the order for. + :param asc: The default sort order is ascending, with smallest values first. To sort in reverse + (descending) order, set this parameter to `false`. + :param nodata: Controls the handling of no-data values (`null`). By default, they are removed. If set to + `true`, missing values in the data are put last; if set to `false`, they are put first. + + :return: The computed permutation. + """ + return _process('order', data=data, asc=asc, nodata=nodata)
+ + + +
+[docs] +@openeo_process +def pi() -> ProcessBuilder: + """ + Pi (π) + + :return: The numerical value of Pi. + """ + return _process('pi', )
+ + + +
+[docs] +@openeo_process +def power(base, p) -> ProcessBuilder: + """ + Exponentiation + + :param base: The numerical base. + :param p: The numerical exponent. + + :return: The computed value for `base` raised to the power of `p`. + """ + return _process('power', base=base, p=p)
+ + + +
+[docs] +@openeo_process +def predict_curve(parameters, function, dimension, labels=UNSET) -> ProcessBuilder: + """ + Predict values + + :param parameters: A data cube with optimal values, e.g. computed by the process ``fit_curve()``. + :param function: The model function. It must take the parameters to fit as array through the first argument + and the independent variable `x` as the second argument. It is recommended to store the model function as + a user-defined process on the back-end. + :param dimension: The name of the dimension for predictions. + :param labels: The labels to predict values for. If no labels are given, predicts values only for no-data + (`null`) values in the data cube. + + :return: A data cube with the predicted values with the provided dimension `dimension` having as many + labels as provided through `labels`. + """ + return _process('predict_curve', + parameters=parameters, + function=build_child_callback(function, parent_parameters=['x', 'parameters']), + dimension=dimension, + labels=labels + )
+ + + +
+[docs] +@openeo_process +def predict_random_forest(data, model) -> ProcessBuilder: + """ + Predict values based on a Random Forest model + + :param data: An array of numbers. + :param model: A model object that can be trained with the processes ``fit_regr_random_forest()`` + (regression) and ``fit_class_random_forest()`` (classification). + + :return: The predicted value. Returns `null` if any of the given values in the array is a no-data value. + """ + return _process('predict_random_forest', data=data, model=model)
+ + + +
+[docs] +@openeo_process +def product(data, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Compute the product by multiplying numbers + + :param data: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. Setting + this flag to `false` considers no-data values so that `null` is returned if any value is such a value. + + :return: The computed product of the sequence of numbers. + """ + return _process('product', data=data, ignore_nodata=ignore_nodata)
+ + + +
+[docs] +@openeo_process +def quantiles(data, probabilities=UNSET, q=UNSET, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Quantiles + + :param data: An array of numbers. + :param probabilities: Quantiles to calculate. Either a list of probabilities or the number of intervals: * + Provide an array with a sorted list of probabilities in ascending order to calculate quantiles for. The + probabilities must be between 0 and 1 (inclusive). If not sorted in ascending order, an + `AscendingProbabilitiesRequired` exception is thrown. * Provide an integer to specify the number of + intervals to calculate quantiles for. Calculates q-quantiles with equal-sized intervals. + :param q: Number of intervals to calculate quantiles for. Calculates q-quantiles with equal-sized + intervals. This parameter has been **deprecated**. Please use the parameter `probabilities` instead. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. Setting + this flag to `false` considers no-data values so that an array with `null` values is returned if any + element is such a value. + + :return: An array with the computed quantiles. The list has either * as many elements as the given list of + `probabilities` had or * *`q`-1* elements. If the input array is empty the resulting array is filled with + as many `null` values as required according to the list above. See the 'Empty array' example for an + example. + """ + return _process('quantiles', data=data, probabilities=probabilities, q=q, ignore_nodata=ignore_nodata)
+ + + +
+[docs] +@openeo_process +def rearrange(data, order) -> ProcessBuilder: + """ + Sort an array based on a permutation + + :param data: The array to rearrange. + :param order: The permutation used for rearranging. + + :return: The rearranged array. + """ + return _process('rearrange', data=data, order=order)
+ + + +
+[docs] +@openeo_process +def reduce_dimension(data, reducer, dimension, context=UNSET) -> ProcessBuilder: + """ + Reduce dimensions + + :param data: A data cube. + :param reducer: A reducer to apply on the specified dimension. A reducer is a single process such as + ``mean()`` or a set of processes, which computes a single value for a list of values, see the category + 'reducer' for such processes. + :param dimension: The name of the dimension over which to reduce. Fails with a `DimensionNotAvailable` + exception if the specified dimension does not exist. + :param context: Additional data to be passed to the reducer. + + :return: A data cube with the newly computed values. It is missing the given dimension, the number of + dimensions decreases by one. The dimension properties (name, type, labels, reference system and resolution) + for all other dimensions remain unchanged. + """ + return _process('reduce_dimension', + data=data, + reducer=build_child_callback(reducer, parent_parameters=['data', 'context']), + dimension=dimension, + context=context + )
+ + + +
+[docs] +@openeo_process +def reduce_spatial(data, reducer, context=UNSET) -> ProcessBuilder: + """ + Reduce spatial dimensions 'x' and 'y' + + :param data: A raster data cube. + :param reducer: A reducer to apply on the horizontal spatial dimensions. A reducer is a single process such + as ``mean()`` or a set of processes, which computes a single value for a list of values, see the category + 'reducer' for such processes. + :param context: Additional data to be passed to the reducer. + + :return: A data cube with the newly computed values. It is missing the horizontal spatial dimensions, the + number of dimensions decreases by two. The dimension properties (name, type, labels, reference system and + resolution) for all other dimensions remain unchanged. + """ + return _process('reduce_spatial', data=data, reducer=build_child_callback(reducer, parent_parameters=['data', 'context']), context=context)
+ + + +
+[docs] +@openeo_process +def rename_dimension(data, source, target) -> ProcessBuilder: + """ + Rename a dimension + + :param data: The data cube. + :param source: The current name of the dimension. Fails with a `DimensionNotAvailable` exception if the + specified dimension does not exist. + :param target: A new Name for the dimension. Fails with a `DimensionExists` exception if a dimension with + the specified name exists. + + :return: A data cube with the same dimensions, but the name of one of the dimensions changes. The old name + can not be referred to any longer. The dimension properties (name, type, labels, reference system and + resolution) remain unchanged. + """ + return _process('rename_dimension', data=data, source=source, target=target)
+ + + +
+[docs] +@openeo_process +def rename_labels(data, dimension, target, source=UNSET) -> ProcessBuilder: + """ + Rename dimension labels + + :param data: The data cube. + :param dimension: The name of the dimension to rename the labels for. + :param target: The new names for the labels. If a target dimension label already exists in the data cube, + a `LabelExists` exception is thrown. + :param source: The original names of the labels to be renamed to corresponding array elements in the + parameter `target`. It is allowed to only specify a subset of labels to rename, as long as the `target` and + `source` parameter have the same length. The order of the labels doesn't need to match the order of the + dimension labels in the data cube. By default, the array is empty so that the dimension labels in the data + cube are expected to be enumerated. If the dimension labels are not enumerated and the given array is + empty, the `LabelsNotEnumerated` exception is thrown. If one of the source dimension labels doesn't exist, + the `LabelNotAvailable` exception is thrown. + + :return: The data cube with the same dimensions. The dimension properties (name, type, labels, reference + system and resolution) remain unchanged, except that for the given dimension the labels change. The old + labels can not be referred to any longer. The number of labels remains the same. + """ + return _process('rename_labels', data=data, dimension=dimension, target=target, source=source)
+ + + +
+[docs] +@openeo_process +def resample_cube_spatial(data, target, method=UNSET) -> ProcessBuilder: + """ + Resample the spatial dimensions to match a target data cube + + :param data: A raster data cube. + :param target: A raster data cube that describes the spatial target resolution. + :param method: Resampling method to use. The following options are available and are meant to align with + [`gdalwarp`](https://gdal.org/programs/gdalwarp.html#cmdoption-gdalwarp-r): * `average`: average (mean) + resampling, computes the weighted average of all valid pixels * `bilinear`: bilinear resampling * `cubic`: + cubic resampling * `cubicspline`: cubic spline resampling * `lanczos`: Lanczos windowed sinc resampling * + `max`: maximum resampling, selects the maximum value from all valid pixels * `med`: median resampling, + selects the median value of all valid pixels * `min`: minimum resampling, selects the minimum value from + all valid pixels * `mode`: mode resampling, selects the value which appears most often of all the sampled + points * `near`: nearest neighbour resampling (default) * `q1`: first quartile resampling, selects the + first quartile value of all valid pixels * `q3`: third quartile resampling, selects the third quartile + value of all valid pixels * `rms` root mean square (quadratic mean) of all valid pixels * `sum`: compute + the weighted sum of all valid pixels Valid pixels are determined based on the function ``is_valid()``. + + :return: A raster data cube with the same dimensions. The dimension properties (name, type, labels, + reference system and resolution) remain unchanged, except for the resolution and dimension labels of the + spatial dimensions. + """ + return _process('resample_cube_spatial', data=data, target=target, method=method)
+ + + +
+[docs] +@openeo_process +def resample_cube_temporal(data, target, dimension=UNSET, valid_within=UNSET) -> ProcessBuilder: + """ + Resample temporal dimensions to match a target data cube + + :param data: A data cube with one or more temporal dimensions. + :param target: A data cube that describes the temporal target resolution. + :param dimension: The name of the temporal dimension to resample, which must exist with this name in both + data cubes. If the dimension is not set or is set to `null`, the process resamples all temporal dimensions + that exist with the same names in both data cubes. The following exceptions may occur: * A dimension is + given, but it does not exist in any of the data cubes: `DimensionNotAvailable` * A dimension is given, but + one of them is not temporal: `DimensionMismatch` * No specific dimension name is given and there are no + temporal dimensions with the same name in the data: `DimensionMismatch` + :param valid_within: Setting this parameter to a numerical value enables that the process searches for + valid values within the given period of days before and after the target timestamps. Valid values are + determined based on the function ``is_valid()``. For example, the limit of `7` for the target timestamps + `2020-01-15 12:00:00` looks for a nearest neighbor after `2020-01-08 12:00:00` and before `2020-01-22 + 12:00:00`. If no valid value is found within the given period, the value will be set to no-data (`null`). + + :return: A data cube with the same dimensions and the same dimension properties (name, type, labels, + reference system and resolution) for all non-temporal dimensions. For the temporal dimension, the name and + type remain unchanged, but the dimension labels, resolution and reference system may change. + """ + return _process('resample_cube_temporal', data=data, target=target, dimension=dimension, valid_within=valid_within)
+ + + +
+[docs] +@openeo_process +def resample_spatial(data, resolution=UNSET, projection=UNSET, method=UNSET, align=UNSET) -> ProcessBuilder: + """ + Resample and warp the spatial dimensions + + :param data: A raster data cube. + :param resolution: Resamples the data cube to the target resolution, which can be specified either as + separate values for x and y or as a single value for both axes. Specified in the units of the target + projection. Doesn't change the resolution by default (`0`). + :param projection: Warps the data cube to the target projection, specified as as [EPSG + code](http://www.epsg-registry.org/) or [WKT2 CRS + string](http://docs.opengeospatial.org/is/18-010r7/18-010r7.html). By default (`null`), the projection is + not changed. + :param method: Resampling method to use. The following options are available and are meant to align with + [`gdalwarp`](https://gdal.org/programs/gdalwarp.html#cmdoption-gdalwarp-r): * `average`: average (mean) + resampling, computes the weighted average of all valid pixels * `bilinear`: bilinear resampling * `cubic`: + cubic resampling * `cubicspline`: cubic spline resampling * `lanczos`: Lanczos windowed sinc resampling * + `max`: maximum resampling, selects the maximum value from all valid pixels * `med`: median resampling, + selects the median value of all valid pixels * `min`: minimum resampling, selects the minimum value from + all valid pixels * `mode`: mode resampling, selects the value which appears most often of all the sampled + points * `near`: nearest neighbour resampling (default) * `q1`: first quartile resampling, selects the + first quartile value of all valid pixels * `q3`: third quartile resampling, selects the third quartile + value of all valid pixels * `rms` root mean square (quadratic mean) of all valid pixels * `sum`: compute + the weighted sum of all valid pixels Valid pixels are determined based on the function ``is_valid()``. + :param align: Specifies to which corner of the spatial extent the new resampled data is aligned to. + + :return: A raster data cube with values warped onto the new projection. It has the same dimensions and the + same dimension properties (name, type, labels, reference system and resolution) for all non-spatial or + vertical spatial dimensions. For the horizontal spatial dimensions the name and type remain unchanged, but + reference system, labels and resolution may change depending on the given parameters. + """ + return _process('resample_spatial', data=data, resolution=resolution, projection=projection, method=method, align=align)
+ + + +
+[docs] +@openeo_process +def round(x, p=UNSET) -> ProcessBuilder: + """ + Round to a specified precision + + :param x: A number to round. + :param p: A positive number specifies the number of digits after the decimal point to round to. A negative + number means rounding to a power of ten, so for example *-2* rounds to the nearest hundred. Defaults to + *0*. + + :return: The rounded number. + """ + return _process('round', x=x, p=p)
+ + + +
+[docs] +@openeo_process +def run_udf(data, udf, runtime, version=UNSET, context=UNSET) -> ProcessBuilder: + """ + Run a UDF + + :param data: The data to be passed to the UDF. + :param udf: Either source code, an absolute URL or a path to a UDF script. + :param runtime: A UDF runtime identifier available at the back-end. + :param version: An UDF runtime version. If set to `null`, the default runtime version specified for each + runtime is used. + :param context: Additional data such as configuration options to be passed to the UDF. + + :return: The data processed by the UDF. The returned value can be of any data type and is exactly what the + UDF code returns. + """ + return _process('run_udf', data=data, udf=udf, runtime=runtime, version=version, context=context)
+ + + +
+[docs] +@openeo_process +def run_udf_externally(data, url, context=UNSET) -> ProcessBuilder: + """ + Run an externally hosted UDF container + + :param data: The data to be passed to the UDF. + :param url: Absolute URL to a remote UDF service. + :param context: Additional data such as configuration options to be passed to the UDF. + + :return: The data processed by the UDF. The returned value can in principle be of any data type, but it + depends on what is returned by the UDF code. Please see the implemented UDF interface for details. + """ + return _process('run_udf_externally', data=data, url=url, context=context)
+ + + +
+[docs] +@openeo_process +def sar_backscatter(data, coefficient=UNSET, elevation_model=UNSET, mask=UNSET, contributing_area=UNSET, local_incidence_angle=UNSET, ellipsoid_incidence_angle=UNSET, noise_removal=UNSET, options=UNSET) -> ProcessBuilder: + """ + Computes backscatter from SAR input + + :param data: The source data cube containing SAR input. + :param coefficient: Select the radiometric correction coefficient. The following options are available: * + `beta0`: radar brightness * `sigma0-ellipsoid`: ground area computed with ellipsoid earth model * + `sigma0-terrain`: ground area computed with terrain earth model * `gamma0-ellipsoid`: ground area computed + with ellipsoid earth model in sensor line of sight * `gamma0-terrain`: ground area computed with terrain + earth model in sensor line of sight (default) * `null`: non-normalized backscatter + :param elevation_model: The digital elevation model to use. Set to `null` (the default) to allow the back- + end to choose, which will improve portability, but reduce reproducibility. + :param mask: If set to `true`, a data mask is added to the bands with the name `mask`. It indicates which + values are valid (1), invalid (0) or contain no-data (null). + :param contributing_area: If set to `true`, a DEM-based local contributing area band named + `contributing_area` is added. The values are given in square meters. + :param local_incidence_angle: If set to `true`, a DEM-based local incidence angle band named + `local_incidence_angle` is added. The values are given in degrees. + :param ellipsoid_incidence_angle: If set to `true`, an ellipsoidal incidence angle band named + `ellipsoid_incidence_angle` is added. The values are given in degrees. + :param noise_removal: If set to `false`, no noise removal is applied. Defaults to `true`, which removes + noise. + :param options: Proprietary options for the backscatter computations. Specifying proprietary options will + reduce portability. + + :return: Backscatter values corresponding to the chosen parametrization. The values are given in linear + scale. + """ + return _process('sar_backscatter', + data=data, + coefficient=coefficient, + elevation_model=elevation_model, + mask=mask, + contributing_area=contributing_area, + local_incidence_angle=local_incidence_angle, + ellipsoid_incidence_angle=ellipsoid_incidence_angle, + noise_removal=noise_removal, + options=options + )
+ + + +
+[docs] +@openeo_process +def save_result(data, format, options=UNSET) -> ProcessBuilder: + """ + Save processed data + + :param data: The data to deliver in the given file format. + :param format: The file format to use. It must be one of the values that the server reports as supported + output file formats, which usually correspond to the short GDAL/OGR codes. This parameter is *case + insensitive*. * If the data cube is empty and the file format can't store empty data cubes, a + `DataCubeEmpty` exception is thrown. * If the file format is otherwise not suitable for storing the + underlying data structure, a `FormatUnsuitable` exception is thrown. + :param options: The file format parameters to be used to create the file(s). Must correspond to the + parameters that the server reports as supported parameters for the chosen `format`. The parameter names and + valid values usually correspond to the GDAL/OGR format options. + + :return: Always returns `true` as in case of an error an exception is thrown which aborts the execution of + the process. + """ + return _process('save_result', data=data, format=format, options=options)
+ + + +
+[docs] +@openeo_process +def sd(data, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Standard deviation + + :param data: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. Setting + this flag to `false` considers no-data values so that `null` is returned if any value is such a value. + + :return: The computed sample standard deviation. + """ + return _process('sd', data=data, ignore_nodata=ignore_nodata)
+ + + +
+[docs] +@openeo_process +def sgn(x) -> ProcessBuilder: + """ + Signum + + :param x: A number. + + :return: The computed signum value of `x`. + """ + return _process('sgn', x=x)
+ + + +
+[docs] +@openeo_process +def sin(x) -> ProcessBuilder: + """ + Sine + + :param x: An angle in radians. + + :return: The computed sine of `x`. + """ + return _process('sin', x=x)
+ + + +
+[docs] +@openeo_process +def sinh(x) -> ProcessBuilder: + """ + Hyperbolic sine + + :param x: An angle in radians. + + :return: The computed hyperbolic sine of `x`. + """ + return _process('sinh', x=x)
+ + + +
+[docs] +@openeo_process +def sort(data, asc=UNSET, nodata=UNSET) -> ProcessBuilder: + """ + Sort data + + :param data: An array with data to sort. + :param asc: The default sort order is ascending, with smallest values first. To sort in reverse + (descending) order, set this parameter to `false`. + :param nodata: Controls the handling of no-data values (`null`). By default, they are removed. If set to + `true`, missing values in the data are put last; if set to `false`, they are put first. + + :return: The sorted array. + """ + return _process('sort', data=data, asc=asc, nodata=nodata)
+ + + +
+[docs] +@openeo_process +def sqrt(x) -> ProcessBuilder: + """ + Square root + + :param x: A number. + + :return: The computed square root. + """ + return _process('sqrt', x=x)
+ + + +
+[docs] +@openeo_process +def subtract(x, y) -> ProcessBuilder: + """ + Subtraction of two numbers + + :param x: The minuend. + :param y: The subtrahend. + + :return: The computed result. + """ + return _process('subtract', x=x, y=y)
+ + + +
+[docs] +@openeo_process +def sum(data, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Compute the sum by adding up numbers + + :param data: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. Setting + this flag to `false` considers no-data values so that `null` is returned if any value is such a value. + + :return: The computed sum of the sequence of numbers. + """ + return _process('sum', data=data, ignore_nodata=ignore_nodata)
+ + + +
+[docs] +@openeo_process +def tan(x) -> ProcessBuilder: + """ + Tangent + + :param x: An angle in radians. + + :return: The computed tangent of `x`. + """ + return _process('tan', x=x)
+ + + +
+[docs] +@openeo_process +def tanh(x) -> ProcessBuilder: + """ + Hyperbolic tangent + + :param x: An angle in radians. + + :return: The computed hyperbolic tangent of `x`. + """ + return _process('tanh', x=x)
+ + + +
+[docs] +@openeo_process +def text_begins(data, pattern, case_sensitive=UNSET) -> ProcessBuilder: + """ + Text begins with another text + + :param data: Text in which to find something at the beginning. + :param pattern: Text to find at the beginning of `data`. Regular expressions are not supported. + :param case_sensitive: Case sensitive comparison can be disabled by setting this parameter to `false`. + + :return: `true` if `data` begins with `pattern`, false` otherwise. + """ + return _process('text_begins', data=data, pattern=pattern, case_sensitive=case_sensitive)
+ + + +
+[docs] +@openeo_process +def text_concat(data, separator=UNSET) -> ProcessBuilder: + """ + Concatenate elements to a single text + + :param data: A set of elements. Numbers, boolean values and null values get converted to their (lower case) + string representation. For example: `1` (integer), `-1.5` (number), `true` / `false` (boolean values) + :param separator: A separator to put between each of the individual texts. Defaults to an empty string. + + :return: A string containing a string representation of all the array elements in the same order, with the + separator between each element. + """ + return _process('text_concat', data=data, separator=separator)
+ + + +
+[docs] +@openeo_process +def text_contains(data, pattern, case_sensitive=UNSET) -> ProcessBuilder: + """ + Text contains another text + + :param data: Text in which to find something in. + :param pattern: Text to find in `data`. Regular expressions are not supported. + :param case_sensitive: Case sensitive comparison can be disabled by setting this parameter to `false`. + + :return: `true` if `data` contains the `pattern`, false` otherwise. + """ + return _process('text_contains', data=data, pattern=pattern, case_sensitive=case_sensitive)
+ + + +
+[docs] +@openeo_process +def text_ends(data, pattern, case_sensitive=UNSET) -> ProcessBuilder: + """ + Text ends with another text + + :param data: Text in which to find something at the end. + :param pattern: Text to find at the end of `data`. Regular expressions are not supported. + :param case_sensitive: Case sensitive comparison can be disabled by setting this parameter to `false`. + + :return: `true` if `data` ends with `pattern`, false` otherwise. + """ + return _process('text_ends', data=data, pattern=pattern, case_sensitive=case_sensitive)
+ + + +
+[docs] +@openeo_process +def trim_cube(data) -> ProcessBuilder: + """ + Remove dimension labels with no-data values + + :param data: A data cube to trim. + + :return: A trimmed data cube with the same dimensions. The dimension properties name, type, reference + system and resolution remain unchanged. The number of dimension labels may decrease. + """ + return _process('trim_cube', data=data)
+ + + +
+[docs] +@openeo_process +def unflatten_dimension(data, dimension, target_dimensions, label_separator=UNSET) -> ProcessBuilder: + """ + Split a single dimensions into multiple dimensions + + :param data: A data cube that is consistently structured so that operation can execute flawlessly (e.g. the + dimension labels need to contain the `label_separator` exactly 1 time for two target dimensions, 2 times + for three target dimensions etc.). + :param dimension: The name of the dimension to split. + :param target_dimensions: The names of the new target dimensions. New dimensions will be created with the + given names and type `other` (see ``add_dimension()``). Fails with a `TargetDimensionExists` exception if + any of the dimensions exists. The order of the array defines the order in which the dimensions and + dimension labels are added to the data cube (see the example in the process description). + :param label_separator: The string that will be used as a separator to split the dimension labels. + + :return: A data cube with the new shape. The dimension properties (name, type, labels, reference system and + resolution) for all other dimensions remain unchanged. + """ + return _process('unflatten_dimension', data=data, dimension=dimension, target_dimensions=target_dimensions, label_separator=label_separator)
+ + + +
+[docs] +@openeo_process +def variance(data, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Variance + + :param data: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. Setting + this flag to `false` considers no-data values so that `null` is returned if any value is such a value. + + :return: The computed sample variance. + """ + return _process('variance', data=data, ignore_nodata=ignore_nodata)
+ + + +
+[docs] +@openeo_process +def vector_buffer(geometries, distance) -> ProcessBuilder: + """ + Buffer geometries by distance + + :param geometries: Geometries to apply the buffer on. Feature properties are preserved. + :param distance: The distance of the buffer in meters. A positive distance expands the geometries, + resulting in outward buffering (dilation), while a negative distance shrinks the geometries, resulting in + inward buffering (erosion). If the unit of the spatial reference system is not meters, a `UnitMismatch` + error is thrown. Use ``vector_reproject()`` to convert the geometries to a suitable spatial reference + system. + + :return: Returns a vector data cube with the computed new geometries of which some may be empty. + """ + return _process('vector_buffer', geometries=geometries, distance=distance)
+ + + +
+[docs] +@openeo_process +def vector_reproject(data, projection, dimension=UNSET) -> ProcessBuilder: + """ + Reprojects the geometry dimension + + :param data: A vector data cube. + :param projection: Coordinate reference system to reproject to. Specified as an [EPSG + code](http://www.epsg-registry.org/) or [WKT2 CRS + string](http://docs.opengeospatial.org/is/18-010r7/18-010r7.html). + :param dimension: The name of the geometry dimension to reproject. If no specific dimension is specified, + the filter applies to all geometry dimensions. Fails with a `DimensionNotAvailable` exception if the + specified dimension does not exist. + + :return: A vector data cube with geometries projected to the new coordinate reference system. The reference + system of the geometry dimension changes, all other dimensions and properties remain unchanged. + """ + return _process('vector_reproject', data=data, projection=projection, dimension=dimension)
+ + + +
+[docs] +@openeo_process +def vector_to_random_points(data, geometry_count=UNSET, total_count=UNSET, group=UNSET, seed=UNSET) -> ProcessBuilder: + """ + Sample random points from geometries + + :param data: Input geometries for sample extraction. + :param geometry_count: The maximum number of points to compute per geometry. Points in the input + geometries can be selected only once by the sampling. + :param total_count: The maximum number of points to compute overall. Throws a `CountMismatch` exception if + the specified value is less than the provided number of geometries. + :param group: Specifies whether the sampled points should be grouped by input geometry (default) or be + generated as independent points. * If the sampled points are grouped, the process generates a `MultiPoint` + per geometry given which keeps the original identifier if present. * Otherwise, each sampled point is + generated as a distinct `Point` geometry without identifier. + :param seed: A randomization seed to use for random sampling. If not given or `null`, no seed is used and + results may differ on subsequent use. + + :return: Returns a vector data cube with the sampled points. + """ + return _process('vector_to_random_points', data=data, geometry_count=geometry_count, total_count=total_count, group=group, seed=seed)
+ + + +
+[docs] +@openeo_process +def vector_to_regular_points(data, distance, group=UNSET) -> ProcessBuilder: + """ + Sample regular points from geometries + + :param data: Input geometries for sample extraction. + :param distance: Defines the minimum distance in meters that is required between two samples generated + *inside* a single geometry. If the unit of the spatial reference system is not meters, a `UnitMismatch` + error is thrown. Use ``vector_reproject()`` to convert the geometries to a suitable spatial reference + system. - For **polygons**, the distance defines the cell sizes of a regular grid that starts at the + upper-left bound of each polygon. The centroid of each cell is then a sample point. If the centroid is not + enclosed in the polygon, no point is sampled. If no point can be sampled for the geometry at all, the first + coordinate of the geometry is returned as point. - For **lines** (line strings), the sampling starts with a + point at the first coordinate of the line and then walks along the line and samples a new point each time + the distance to the previous point has been reached again. - For **points**, the point is returned as + given. + :param group: Specifies whether the sampled points should be grouped by input geometry (default) or be + generated as independent points. * If the sampled points are grouped, the process generates a `MultiPoint` + per geometry given which keeps the original identifier if present. * Otherwise, each sampled point is + generated as a distinct `Point` geometry without identifier. + + :return: Returns a vector data cube with the sampled points. + """ + return _process('vector_to_regular_points', data=data, distance=distance, group=group)
+ + + +
+[docs] +@openeo_process +def xor(x, y) -> ProcessBuilder: + """ + Logical XOR (exclusive or) + + :param x: A boolean value. + :param y: A boolean value. + + :return: Boolean result of the logical XOR. + """ + return _process('xor', x=x, y=y)
+ +
+ +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/openeo/rest/_datacube.html b/_modules/openeo/rest/_datacube.html new file mode 100644 index 000000000..1a7cdcd4d --- /dev/null +++ b/_modules/openeo/rest/_datacube.html @@ -0,0 +1,494 @@ + + + + + + + openeo.rest._datacube — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +

Source code for openeo.rest._datacube

+from __future__ import annotations
+
+import logging
+import pathlib
+import re
+import typing
+import uuid
+import warnings
+from typing import Dict, List, Optional, Tuple, Union
+
+import requests
+
+from openeo.internal.graph_building import FlatGraphableMixin, PGNode, _FromNodeMixin
+from openeo.internal.jupyter import render_component
+from openeo.internal.processes.builder import (
+    convert_callable_to_pgnode,
+    get_parameter_names,
+)
+from openeo.internal.warnings import UserDeprecationWarning
+from openeo.rest import OpenEoClientException
+from openeo.util import dict_no_none, str_truncate
+
+if typing.TYPE_CHECKING:
+    # Imports for type checking only (circular import issue at runtime).
+    from openeo.rest.connection import Connection
+
+log = logging.getLogger(__name__)
+
+# Sentinel object to refer to "current" cube in chained cube processing expressions.
+THIS = object()
+
+
+class _ProcessGraphAbstraction(_FromNodeMixin, FlatGraphableMixin):
+    """
+    Base class for client-side abstractions/wrappers
+    for structures that are represented by a openEO process graph:
+    raster data cubes, vector cubes, ML models, ...
+    """
+
+    def __init__(self, pgnode: PGNode, connection: Union[Connection, None]):
+        self._pg = pgnode
+        # TODO: now that connection can officially be None:
+        #       improve exceptions in cases where is it still assumed to be a real connection (download, create_job, ...)
+        self._connection = connection
+
+    def __str__(self):
+        return "{t}({pg})".format(t=self.__class__.__name__, pg=self._pg)
+
+    def flat_graph(self) -> Dict[str, dict]:
+        """
+        Get the process graph in internal flat dict representation.
+
+        .. warning:: This method is mainly intended for internal use.
+            It is not recommended for general use and is *subject to change*.
+
+            Instead, it is recommended to use
+            :py:meth:`to_json()` or :py:meth:`print_json()`
+            to obtain a standardized, interoperable JSON representation of the process graph.
+            See :ref:`process_graph_export` for more information.
+        """
+        # TODO: wrap in {"process_graph":...} by default/optionally?
+        return self._pg.flat_graph()
+
+    @property
+    def _api_version(self):
+        return self._connection.capabilities().api_version_check
+
+    @property
+    def connection(self) -> Connection:
+        return self._connection
+
+    def result_node(self) -> PGNode:
+        """
+        Get the current result node (:py:class:`PGNode`) of the process graph.
+
+        .. versionadded:: 0.10.1
+        """
+        return self._pg
+
+    def from_node(self):
+        # _FromNodeMixin API
+        return self._pg
+
+    def _build_pgnode(
+        self,
+        process_id: str,
+        arguments: Optional[dict] = None,
+        namespace: Optional[str] = None,
+        **kwargs
+    ) -> PGNode:
+        """
+        Helper to build a PGNode from given argument dict and/or kwargs,
+        and possibly resolving the `THIS` reference.
+        """
+        arguments = {**(arguments or {}), **kwargs}
+        for k, v in arguments.items():
+            if v is THIS:
+                arguments[k] = self
+            # TODO: also necessary to traverse lists/dictionaries?
+        return PGNode(process_id=process_id, arguments=arguments, namespace=namespace)
+
+    # TODO #278 also move process graph "execution" methods here: `download`, `execute`, `execute_batch`, `create_job`, `save_udf`,  ...
+
+    def _repr_html_(self):
+        process = {"process_graph": self.flat_graph()}
+        parameters = {
+            "id": uuid.uuid4().hex,
+            "explicit-zoom": True,
+            "height": "400px",
+        }
+        return render_component("model-builder", data=process, parameters=parameters)
+
+
+
+[docs] +class UDF: + """ + Helper class to load UDF code (e.g. from file) and embed them as "callback" or child process in a process graph. + + Usage example: + + .. code-block:: python + + udf = UDF.from_file("my-udf-code.py") + cube = cube.apply(process=udf) + + + .. versionchanged:: 0.13.0 + Added auto-detection of ``runtime``. + Specifying the ``data`` argument is not necessary anymore, and actually deprecated. + Added :py:meth:`from_file` to simplify loading UDF code from a file. + See :ref:`old_udf_api` for more background about the changes. + """ + + # TODO: eliminate dependency on `openeo.rest.connection` and move to somewhere under `openeo.internal`? + + __slots__ = ["code", "_runtime", "version", "context", "_source"] + + def __init__( + self, + code: str, + runtime: Optional[str] = None, + data=None, # TODO #181 remove `data` argument + version: Optional[str] = None, + context: Optional[dict] = None, + _source=None, + ): + """ + Construct a UDF object from given code string and other argument related to the ``run_udf`` process. + + :param code: UDF source code string (Python, R, ...) + :param runtime: optional UDF runtime identifier, will be autodetected from source code if omitted. + :param data: unused leftover from old API. Don't use this argument, it will be removed in a future release. + :param version: optional UDF runtime version string + :param context: optional additional UDF context data + :param _source: (for internal use) source identifier + """ + # TODO: automatically dedent code (when literal string) ? + self.code = code + self._runtime = runtime + self.version = version + self.context = context + self._source = _source + if data is not None: + # TODO #181 remove `data` argument + warnings.warn( + f"The `data` argument of `{self.__class__.__name__}` is deprecated, unused and will be removed in a future release.", + category=UserDeprecationWarning, + stacklevel=2, + ) + + def __repr__(self): + return f"<{type(self).__name__} runtime={self._runtime!r} code={str_truncate(self.code, width=200)!r}>" + + def get_runtime(self, connection: Optional[Connection] = None) -> str: + return self._runtime or self._guess_runtime(connection=connection) + +
+[docs] + @classmethod + def from_file( + cls, + path: Union[str, pathlib.Path], + runtime: Optional[str] = None, + version: Optional[str] = None, + context: Optional[dict] = None, + ) -> UDF: + """ + Load a UDF from a local file. + + .. seealso:: + :py:meth:`from_url` for loading from a URL. + + :param path: path to the local file with UDF source code + :param runtime: optional UDF runtime identifier, will be auto-detected from source code if omitted. + :param version: optional UDF runtime version string + :param context: optional additional UDF context data + """ + path = pathlib.Path(path) + code = path.read_text(encoding="utf-8") + return cls( + code=code, runtime=runtime, version=version, context=context, _source=path + )
+ + +
+[docs] + @classmethod + def from_url( + cls, + url: str, + runtime: Optional[str] = None, + version: Optional[str] = None, + context: Optional[dict] = None, + ) -> UDF: + """ + Load a UDF from a URL. + + .. seealso:: + :py:meth:`from_file` for loading from a local file. + + :param url: URL path to load the UDF source code from + :param runtime: optional UDF runtime identifier, will be auto-detected from source code if omitted. + :param version: optional UDF runtime version string + :param context: optional additional UDF context data + """ + resp = requests.get(url) + resp.raise_for_status() + code = resp.text + return cls( + code=code, runtime=runtime, version=version, context=context, _source=url + )
+ + + def _guess_runtime(self, connection: Optional[Connection] = None) -> str: + """Guess UDF runtime from UDF source (path) or source code.""" + # First, guess UDF language + language = None + if isinstance(self._source, pathlib.Path): + language = self._guess_runtime_from_suffix(self._source.suffix) + elif isinstance(self._source, str): + url_match = re.match( + r"https?://.*?(?P<suffix>\.\w+)([&#].*)?$", self._source + ) + if url_match: + language = self._guess_runtime_from_suffix(url_match.group("suffix")) + if not language: + # Guess language from UDF code + if re.search(r"^def [\w0-9_]+\(", self.code, flags=re.MULTILINE): + language = "Python" + # TODO: detection heuristics for R and other languages? + if not language: + raise OpenEoClientException("Failed to detect language of UDF code.") + runtime = language + if connection: + # Some additional best-effort validation/normalization of the runtime + # TODO: this just does some case-normalization, just drop that all together to eliminate + # the dependency on a connection object. See https://github.com/Open-EO/openeo-api/issues/510 + runtimes = {k.lower(): k for k in connection.list_udf_runtimes().keys()} + runtime = runtimes.get(runtime.lower(), runtime) + return runtime + + def _guess_runtime_from_suffix(self, suffix: str) -> Union[str]: + return { + ".py": "Python", + ".r": "R", + }.get(suffix.lower()) + +
+[docs] + def get_run_udf_callback(self, connection: Optional[Connection] = None, data_parameter: str = "data") -> PGNode: + """ + For internal use: construct `run_udf` node to be used as callback in `apply`, `reduce_dimension`, ... + """ + arguments = dict_no_none( + data={"from_parameter": data_parameter}, + udf=self.code, + runtime=self.get_runtime(connection=connection), + version=self.version, + context=self.context, + ) + return PGNode(process_id="run_udf", arguments=arguments)
+
+ + + +def build_child_callback( + process: Union[str, PGNode, typing.Callable, UDF], + parent_parameters: List[str], + connection: Optional[Connection] = None, +) -> dict: + """ + Build a "callback" process: a user defined process that is used by another process (such + as `apply`, `apply_dimension`, `reduce`, ....) + + :param process: process id string, PGNode or callable that uses the ProcessBuilder mechanism to build a process + :param parent_parameters: list of parameter names defined for child process + :param connection: optional connection object to improve runtime validation for UDFs + :return: + """ + # TODO: move this to more generic process graph building utility module + # TODO: autodetect the parameters defined by parent process? + # TODO: eliminate need for connection object (also see `UDF._guess_runtime`) + # TODO: when `openeo.rest` deps are gone: move this helper to somewhere under `openeo.internal` + if isinstance(process, PGNode): + # Assume this is already a valid callback process + pg = process + elif isinstance(process, str): + # Assume given reducer is a simple predefined reduce process_id + # TODO: avoid local import (workaround for circular import issue) + import openeo.processes + if process in openeo.processes.__dict__: + process_params = get_parameter_names(openeo.processes.__dict__[process]) + # TODO: switch to "Callable" handling here + else: + # Best effort guess + process_params = parent_parameters + if parent_parameters == ["x", "y"] and (len(process_params) == 1 or process_params[:1] == ["data"]): + # Special case: wrap all parent parameters in an array + arguments = {process_params[0]: [{"from_parameter": p} for p in parent_parameters]} + else: + # Only pass parameters that correspond with an arg name + common = set(process_params).intersection(parent_parameters) + arguments = {p: {"from_parameter": p} for p in common} + pg = PGNode(process_id=process, arguments=arguments) + elif isinstance(process, typing.Callable): + pg = convert_callable_to_pgnode(process, parent_parameters=parent_parameters) + elif isinstance(process, UDF): + pg = process.get_run_udf_callback(connection=connection, data_parameter=parent_parameters[0]) + elif isinstance(process, dict) and isinstance(process.get("process_graph"), PGNode): + pg = process["process_graph"] + else: + raise ValueError(process) + + return PGNode.to_process_graph_argument(pg) + + +def _ensure_save_result( + cube: _ProcessGraphAbstraction, + *, + format: Optional[str] = None, + options: Optional[dict] = None, + weak_format: Optional[str] = None, + default_format: str, + method: str, +) -> _ProcessGraphAbstraction: + """ + Make sure there is a`save_result` node in the process graph. + + :param format: (optional) desired `save_result` file format + :param options: (optional) desired `save_result` file format parameters + :param weak_format: (optional) weak format indicator guessed from file name + :param default_format: default format for data type to use when no format is specified by user + :return: + """ + # TODO #278 instead of standalone helper function, move this to common base class for raster cubes, vector cubes, ... + save_result_nodes = [n for n in cube.result_node().walk_nodes() if n.process_id == "save_result"] + + if not save_result_nodes: + # No `save_result` node yet: automatically add it. + # TODO: the `save_result` method is not defined on _ProcessGraphAbstraction, but it is on DataCube and VectorCube + cube = cube.save_result(format=format or weak_format or default_format, options=options) + elif format or options: + raise OpenEoClientException( + f"{method} with explicit output {'format' if format else 'options'} {format or options!r}," + f" but the process graph already has `save_result` node(s)" + f" which is ambiguous and should not be combined." + ) + + return cube +
+ +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/openeo/rest/connection.html b/_modules/openeo/rest/connection.html new file mode 100644 index 000000000..c8124f1c2 --- /dev/null +++ b/_modules/openeo/rest/connection.html @@ -0,0 +1,2376 @@ + + + + + + + openeo.rest.connection — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +

Source code for openeo.rest.connection

+"""
+This module provides a Connection object to manage and persist settings when interacting with the OpenEO API.
+"""
+from __future__ import annotations
+
+import datetime
+import json
+import logging
+import os
+import shlex
+import sys
+import warnings
+from collections import OrderedDict
+from pathlib import Path, PurePosixPath
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    Iterable,
+    Iterator,
+    List,
+    Optional,
+    Sequence,
+    Set,
+    Tuple,
+    Union,
+)
+
+import requests
+import shapely.geometry.base
+from requests import Response
+from requests.auth import AuthBase, HTTPBasicAuth
+
+import openeo
+from openeo.capabilities import ApiVersionException, ComparableVersion
+from openeo.config import config_log, get_config_option
+from openeo.internal.documentation import openeo_process
+from openeo.internal.graph_building import FlatGraphableMixin, PGNode, as_flat_graph
+from openeo.internal.jupyter import VisualDict, VisualList
+from openeo.internal.processes.builder import ProcessBuilderBase
+from openeo.internal.warnings import deprecated, legacy_alias
+from openeo.metadata import (
+    Band,
+    BandDimension,
+    CollectionMetadata,
+    SpatialDimension,
+    TemporalDimension,
+)
+from openeo.rest import (
+    DEFAULT_DOWNLOAD_CHUNK_SIZE,
+    CapabilitiesException,
+    OpenEoApiError,
+    OpenEoApiPlainError,
+    OpenEoClientException,
+    OpenEoRestError,
+)
+from openeo.rest._datacube import _ProcessGraphAbstraction, build_child_callback
+from openeo.rest.auth.auth import BasicBearerAuth, BearerAuth, NullAuth, OidcBearerAuth
+from openeo.rest.auth.config import AuthConfig, RefreshTokenStore
+from openeo.rest.auth.oidc import (
+    DefaultOidcClientGrant,
+    GrantsChecker,
+    OidcAuthCodePkceAuthenticator,
+    OidcAuthenticator,
+    OidcClientCredentialsAuthenticator,
+    OidcClientInfo,
+    OidcDeviceAuthenticator,
+    OidcException,
+    OidcProviderInfo,
+    OidcRefreshTokenAuthenticator,
+    OidcResourceOwnerPasswordAuthenticator,
+)
+from openeo.rest.datacube import DataCube, InputDate
+from openeo.rest.graph_building import CollectionProperty
+from openeo.rest.job import BatchJob, RESTJob
+from openeo.rest.mlmodel import MlModel
+from openeo.rest.rest_capabilities import RESTCapabilities
+from openeo.rest.service import Service
+from openeo.rest.udp import Parameter, RESTUserDefinedProcess
+from openeo.rest.userfile import UserFile
+from openeo.rest.vectorcube import VectorCube
+from openeo.util import (
+    ContextTimer,
+    LazyLoadCache,
+    dict_no_none,
+    ensure_list,
+    load_json_resource,
+    repr_truncate,
+    rfc3339,
+    str_truncate,
+    url_join,
+)
+
+_log = logging.getLogger(__name__)
+
+# Default timeouts for requests
+# TODO: get default_timeout from config?
+DEFAULT_TIMEOUT = 20 * 60
+DEFAULT_TIMEOUT_SYNCHRONOUS_EXECUTE = 30 * 60
+
+
+class RestApiConnection:
+    """Base connection class implementing generic REST API request functionality"""
+
+    def __init__(
+        self,
+        root_url: str,
+        auth: Optional[AuthBase] = None,
+        session: Optional[requests.Session] = None,
+        default_timeout: Optional[int] = None,
+        slow_response_threshold: Optional[float] = None,
+    ):
+        self._root_url = root_url
+        self.auth = auth or NullAuth()
+        self.session = session or requests.Session()
+        self.default_timeout = default_timeout or DEFAULT_TIMEOUT
+        self.default_headers = {
+            "User-Agent": "openeo-python-client/{cv} {py}/{pv} {pl}".format(
+                cv=openeo.client_version(),
+                py=sys.implementation.name, pv=".".join(map(str, sys.version_info[:3])),
+                pl=sys.platform
+            )
+        }
+        self.slow_response_threshold = slow_response_threshold
+
+    @property
+    def root_url(self):
+        return self._root_url
+
+    def build_url(self, path: str):
+        return url_join(self._root_url, path)
+
+    def _merged_headers(self, headers: dict) -> dict:
+        """Merge default headers with given headers"""
+        result = self.default_headers.copy()
+        if headers:
+            result.update(headers)
+        return result
+
+    def _is_external(self, url: str) -> bool:
+        """Check if given url is external (not under root url)"""
+        root = self.root_url.rstrip("/")
+        return not (url == root or url.startswith(root + '/'))
+
+    def request(
+        self,
+        method: str,
+        path: str,
+        *,
+        params: Optional[dict] = None,
+        headers: Optional[dict] = None,
+        auth: Optional[AuthBase] = None,
+        check_error: bool = True,
+        expected_status: Optional[Union[int, Iterable[int]]] = None,
+        **kwargs,
+    ):
+        """Generic request send"""
+        url = self.build_url(path)
+        # Don't send default auth headers to external domains.
+        auth = auth or (self.auth if not self._is_external(url) else None)
+        slow_response_threshold = kwargs.pop("slow_response_threshold", self.slow_response_threshold)
+        if _log.isEnabledFor(logging.DEBUG):
+            _log.debug(
+                "Request `{m} {u}` with params {p}, headers {h}, auth {a}, kwargs {k}".format(
+                    m=method.upper(),
+                    u=url,
+                    p=params,
+                    h=headers and headers.keys(),
+                    a=type(auth).__name__,
+                    k=list(kwargs.keys()),
+                )
+            )
+        with ContextTimer() as timer:
+            resp = self.session.request(
+                method=method,
+                url=url,
+                params=params,
+                headers=self._merged_headers(headers),
+                auth=auth,
+                timeout=kwargs.pop("timeout", self.default_timeout),
+                **kwargs
+            )
+        if slow_response_threshold and timer.elapsed() > slow_response_threshold:
+            _log.warning("Slow response: `{m} {u}` took {e:.2f}s (>{t:.2f}s)".format(
+                m=method.upper(), u=str_truncate(url, width=64),
+                e=timer.elapsed(), t=slow_response_threshold
+            ))
+        if _log.isEnabledFor(logging.DEBUG):
+            _log.debug(
+                f"openEO request `{resp.request.method} {resp.request.path_url}` -> response {resp.status_code} headers {resp.headers!r}"
+            )
+        # Check for API errors and unexpected HTTP status codes as desired.
+        status = resp.status_code
+        expected_status = ensure_list(expected_status) if expected_status else []
+        if check_error and status >= 400 and status not in expected_status:
+            self._raise_api_error(resp)
+        if expected_status and status not in expected_status:
+            raise OpenEoRestError("Got status code {s!r} for `{m} {p}` (expected {e!r}) with body {body}".format(
+                m=method.upper(), p=path, s=status, e=expected_status, body=resp.text)
+            )
+        return resp
+
+    def _raise_api_error(self, response: requests.Response):
+        """Convert API error response to Python exception"""
+        status_code = response.status_code
+        try:
+            info = response.json()
+        except Exception:
+            info = None
+
+        # Valid JSON object with "code" and "message" fields indicates a proper openEO API error.
+        if isinstance(info, dict):
+            error_code = info.get("code")
+            error_message = info.get("message")
+            if error_code and isinstance(error_code, str) and error_message and isinstance(error_message, str):
+                raise OpenEoApiError(
+                    http_status_code=status_code,
+                    code=error_code,
+                    message=error_message,
+                    id=info.get("id"),
+                    url=info.get("url"),
+                )
+
+        # Failed to parse it as a compliant openEO API error: show body as-is in the exception.
+        text = response.text
+        error_message = None
+        _log.warning(f"Failed to parse API error response: [{status_code}] {text!r} (headers: {response.headers})")
+
+        # TODO: eliminate this VITO-backend specific error massaging?
+        if status_code == 502 and "Proxy Error" in text:
+            error_message = (
+                "Received 502 Proxy Error."
+                " This typically happens when a synchronous openEO processing request takes too long and is aborted."
+                " Consider using a batch job instead."
+            )
+
+        raise OpenEoApiPlainError(message=text, http_status_code=status_code, error_message=error_message)
+
+    def get(
+        self,
+        path: str,
+        *,
+        params: Optional[dict] = None,
+        stream: bool = False,
+        auth: Optional[AuthBase] = None,
+        **kwargs,
+    ) -> Response:
+        """
+        Do GET request to REST API.
+
+        :param path: API path (without root url)
+        :param params: Additional query parameters
+        :param stream: True if the get request should be streamed, else False
+        :param auth: optional custom authentication to use instead of the default one
+        :return: response: Response
+        """
+        return self.request("get", path=path, params=params, stream=stream, auth=auth, **kwargs)
+
+    def post(self, path: str, json: Optional[dict] = None, **kwargs) -> Response:
+        """
+        Do POST request to REST API.
+
+        :param path: API path (without root url)
+        :param json: Data (as dictionary) to be posted with JSON encoding)
+        :return: response: Response
+        """
+        return self.request("post", path=path, json=json, allow_redirects=False, **kwargs)
+
+    def delete(self, path: str, **kwargs) -> Response:
+        """
+        Do DELETE request to REST API.
+
+        :param path: API path (without root url)
+        :return: response: Response
+        """
+        return self.request("delete", path=path, allow_redirects=False, **kwargs)
+
+    def patch(self, path: str, **kwargs) -> Response:
+        """
+        Do PATCH request to REST API.
+
+        :param path: API path (without root url)
+        :return: response: Response
+        """
+        return self.request("patch", path=path, allow_redirects=False, **kwargs)
+
+    def put(self, path: str, headers: Optional[dict] = None, data: Optional[dict] = None, **kwargs) -> Response:
+        """
+        Do PUT request to REST API.
+
+        :param path: API path (without root url)
+        :param headers: headers that gets added to the request.
+        :param data: data that gets added to the request.
+        :return: response: Response
+        """
+        return self.request("put", path=path, data=data, headers=headers, allow_redirects=False, **kwargs)
+
+    def __repr__(self):
+        return "<{c} to {r!r} with {a}>".format(c=type(self).__name__, r=self._root_url, a=type(self.auth).__name__)
+
+
+
+[docs] +class Connection(RestApiConnection): + """ + Connection to an openEO backend. + + :param url: Backend root url + :param session: Optional ``requests.Session`` object to use for requests. + :param default_timeout: Default timeout for requests in seconds. + :param auto_validate: toggle to automatically validate process graphs before execution + :param slow_response_threshold: Optional threshold in seconds + to consider a response as slow and log a warning. + :param auth_config: Optional :class:`AuthConfig` object + to fetch authentication related configuration from. + :param refresh_token_store: For advanced usage: + custom :class:`RefreshTokenStore` object + to use for storing/loading refresh tokens. + :param oidc_auth_renewer: For advanced usage: + optional :class:`OidcAuthenticator` object to use for renewing OIDC tokens. + :param auth: Optional ``requests.auth.AuthBase`` object to use for requests. + Usage of this parameter is deprecated, use the specific authentication methods instead. + """ + + _MINIMUM_API_VERSION = ComparableVersion("1.0.0") + + def __init__( + self, + url: str, + *, + session: Optional[requests.Session] = None, + default_timeout: Optional[int] = None, + auto_validate: bool = True, + slow_response_threshold: Optional[float] = None, + auth_config: Optional[AuthConfig] = None, + refresh_token_store: Optional[RefreshTokenStore] = None, + oidc_auth_renewer: Optional[OidcAuthenticator] = None, + auth: Optional[AuthBase] = None, + ): + if "://" not in url: + url = "https://" + url + self._orig_url = url + super().__init__( + root_url=self.version_discovery(url, session=session, timeout=default_timeout), + auth=auth, session=session, default_timeout=default_timeout, + slow_response_threshold=slow_response_threshold, + ) + self._capabilities_cache = LazyLoadCache() + + # Initial API version check. + self._api_version.require_at_least(self._MINIMUM_API_VERSION) + + self._auth_config = auth_config + self._refresh_token_store = refresh_token_store + self._oidc_auth_renewer = oidc_auth_renewer + self._auto_validate = auto_validate + +
+[docs] + @classmethod + def version_discovery( + cls, url: str, session: Optional[requests.Session] = None, timeout: Optional[int] = None + ) -> str: + """ + Do automatic openEO API version discovery from given url, using a "well-known URI" strategy. + + :param url: initial backend url (not including "/.well-known/openeo") + :return: root url of highest supported backend version + """ + try: + connection = RestApiConnection(url, session=session) + well_known_url_response = connection.get("/.well-known/openeo", timeout=timeout) + assert well_known_url_response.status_code == 200 + versions = well_known_url_response.json()["versions"] + supported_versions = [v for v in versions if cls._MINIMUM_API_VERSION <= v["api_version"]] + assert supported_versions + production_versions = [v for v in supported_versions if v.get("production", True)] + highest_version = max(production_versions or supported_versions, key=lambda v: v["api_version"]) + _log.debug("Highest supported version available in backend: %s" % highest_version) + return highest_version['url'] + except Exception: + # Be very lenient about failing on the well-known URI strategy. + return url
+ + + def _get_auth_config(self) -> AuthConfig: + if self._auth_config is None: + self._auth_config = AuthConfig() + return self._auth_config + + def _get_refresh_token_store(self) -> RefreshTokenStore: + if self._refresh_token_store is None: + self._refresh_token_store = RefreshTokenStore() + return self._refresh_token_store + +
+[docs] + def authenticate_basic(self, username: Optional[str] = None, password: Optional[str] = None) -> Connection: + """ + Authenticate a user to the backend using basic username and password. + + :param username: User name + :param password: User passphrase + """ + if not self.capabilities().supports_endpoint("/credentials/basic", method="GET"): + raise OpenEoClientException("This openEO back-end does not support basic authentication.") + if username is None: + username, password = self._get_auth_config().get_basic_auth(backend=self._orig_url) + if username is None: + raise OpenEoClientException("No username/password given or found.") + + resp = self.get( + '/credentials/basic', + # /credentials/basic is the only endpoint that expects a Basic HTTP auth + auth=HTTPBasicAuth(username, password) + ).json() + # Switch to bearer based authentication in further requests. + self.auth = BasicBearerAuth(access_token=resp["access_token"]) + return self
+ + + def _get_oidc_provider( + self, provider_id: Union[str, None] = None, parse_info: bool = True + ) -> Tuple[str, Union[OidcProviderInfo, None]]: + """ + Get provider id and info, based on context. + If provider_id is given, verify it against backend's list of providers. + If not given, find a suitable provider based on env vars, config or backend's default. + + :param provider_id: id of OIDC provider as specified by backend (/credentials/oidc). + Can be None if there is just one provider. + :param parse_info: whether to parse the provider info into an :py:class:`OidcProviderInfo` object + (which involves a ".well-known/openid-configuration" request) + :return: resolved/verified provider_id and provider info object (unless ``parse_info`` is False) + """ + oidc_info = self.get("/credentials/oidc", expected_status=200).json() + providers = OrderedDict((p["id"], p) for p in oidc_info["providers"]) + if len(providers) < 1: + raise OpenEoClientException("Backend lists no OIDC providers.") + _log.info("Found OIDC providers: {p}".format(p=list(providers.keys()))) + + # TODO: also support specifying provider through issuer URL? + provider_id_from_env = os.environ.get("OPENEO_AUTH_PROVIDER_ID") + + if provider_id: + if provider_id not in providers: + raise OpenEoClientException( + "Requested OIDC provider {r!r} not available. Should be one of {p}.".format( + r=provider_id, p=list(providers.keys()) + ) + ) + provider = providers[provider_id] + elif provider_id_from_env and provider_id_from_env in providers: + _log.info(f"Using provider_id {provider_id_from_env!r} from OPENEO_AUTH_PROVIDER_ID env var") + provider_id = provider_id_from_env + provider = providers[provider_id] + elif len(providers) == 1: + provider_id, provider = providers.popitem() + _log.info( + f"No OIDC provider given, but only one available: {provider_id!r}. Using that one." + ) + else: + # Check if there is a single provider in the config to use. + backend = self._orig_url + provider_configs = self._get_auth_config().get_oidc_provider_configs( + backend=backend + ) + intersection = set(provider_configs.keys()).intersection(providers.keys()) + if len(intersection) == 1: + provider_id = intersection.pop() + provider = providers[provider_id] + _log.info( + f"No OIDC provider given, but only one in config (for backend {backend!r}): {provider_id!r}. Using that one." + ) + else: + provider_id, provider = providers.popitem(last=False) + _log.info( + f"No OIDC provider given. Using first provider {provider_id!r} as advertised by backend." + ) + + provider_info = OidcProviderInfo.from_dict(provider) if parse_info else None + + return provider_id, provider_info + + def _get_oidc_provider_and_client_info( + self, + provider_id: str, + client_id: Union[str, None] = None, + client_secret: Union[str, None] = None, + default_client_grant_check: Union[None, GrantsChecker] = None, + ) -> Tuple[str, OidcClientInfo]: + """ + Resolve provider_id and client info (as given or from config) + + :param provider_id: id of OIDC provider as specified by backend (/credentials/oidc). + Can be None if there is just one provider. + + :return: OIDC provider id and client info + """ + provider_id, provider = self._get_oidc_provider(provider_id) + + if client_id is None: + _log.debug("No client_id: checking config for preferred client_id") + client_id, client_secret = self._get_auth_config().get_oidc_client_configs( + backend=self._orig_url, provider_id=provider_id + ) + if client_id: + _log.info("Using client_id {c!r} from config (provider {p!r})".format(c=client_id, p=provider_id)) + if client_id is None and default_client_grant_check: + # Try "default_clients" from backend's provider info. + _log.debug("No client_id given: checking default clients in backend's provider info") + client_id = provider.get_default_client_id(grant_check=default_client_grant_check) + if client_id: + _log.info("Using default client_id {c!r} from OIDC provider {p!r} info.".format( + c=client_id, p=provider_id + )) + if client_id is None: + raise OpenEoClientException("No client_id found.") + + client_info = OidcClientInfo(client_id=client_id, client_secret=client_secret, provider=provider) + + return provider_id, client_info + + def _authenticate_oidc( + self, + authenticator: OidcAuthenticator, + *, + provider_id: str, + store_refresh_token: bool = False, + fallback_refresh_token_to_store: Optional[str] = None, + oidc_auth_renewer: Optional[OidcAuthenticator] = None, + ) -> Connection: + """ + Authenticate through OIDC and set up bearer token (based on OIDC access_token) for further requests. + """ + tokens = authenticator.get_tokens(request_refresh_token=store_refresh_token) + _log.info("Obtained tokens: {t}".format(t=[k for k, v in tokens._asdict().items() if v])) + if store_refresh_token: + refresh_token = tokens.refresh_token or fallback_refresh_token_to_store + if refresh_token: + self._get_refresh_token_store().set_refresh_token( + issuer=authenticator.provider_info.issuer, + client_id=authenticator.client_id, + refresh_token=refresh_token + ) + if not oidc_auth_renewer: + oidc_auth_renewer = OidcRefreshTokenAuthenticator( + client_info=authenticator.client_info, refresh_token=refresh_token + ) + else: + _log.warning("No OIDC refresh token to store.") + token = tokens.access_token + self.auth = OidcBearerAuth(provider_id=provider_id, access_token=token) + self._oidc_auth_renewer = oidc_auth_renewer + return self + +
+[docs] + def authenticate_oidc_authorization_code( + self, + client_id: Optional[str] = None, + client_secret: Optional[str] = None, + provider_id: Optional[str] = None, + timeout: Optional[int] = None, + server_address: Optional[Tuple[str, int]] = None, + webbrowser_open: Optional[Callable] = None, + store_refresh_token=False, + ) -> Connection: + """ + OpenID Connect Authorization Code Flow (with PKCE). + + .. deprecated:: 0.19.0 + Usage of the Authorization Code flow is deprecated (because of its complexity) and will be removed. + It is recommended to use the Device Code flow with :py:meth:`authenticate_oidc_device` + or Client Credentials flow with :py:meth:`authenticate_oidc_client_credentials`. + """ + provider_id, client_info = self._get_oidc_provider_and_client_info( + provider_id=provider_id, client_id=client_id, client_secret=client_secret, + default_client_grant_check=[DefaultOidcClientGrant.AUTH_CODE_PKCE], + ) + authenticator = OidcAuthCodePkceAuthenticator( + client_info=client_info, + webbrowser_open=webbrowser_open, timeout=timeout, server_address=server_address + ) + return self._authenticate_oidc(authenticator, provider_id=provider_id, store_refresh_token=store_refresh_token)
+ + +
+[docs] + def authenticate_oidc_client_credentials( + self, + client_id: Optional[str] = None, + client_secret: Optional[str] = None, + provider_id: Optional[str] = None, + ) -> Connection: + """ + Authenticate with :ref:`OIDC Client Credentials flow <authenticate_oidc_client_credentials>` + + Client id, secret and provider id can be specified directly through the available arguments. + It is also possible to leave these arguments empty and specify them through + environment variables ``OPENEO_AUTH_CLIENT_ID``, + ``OPENEO_AUTH_CLIENT_SECRET`` and ``OPENEO_AUTH_PROVIDER_ID`` respectively + as discussed in :ref:`authenticate_oidc_client_credentials_env_vars`. + + :param client_id: client id to use + :param client_secret: client secret to use + :param provider_id: provider id to use + Fallback value can be set through environment variable ``OPENEO_AUTH_PROVIDER_ID``. + + .. versionchanged:: 0.18.0 Allow specifying client id, secret and provider id through environment variables. + """ + # TODO: option to get client id/secret from a config file too? + if client_id is None and "OPENEO_AUTH_CLIENT_ID" in os.environ and "OPENEO_AUTH_CLIENT_SECRET" in os.environ: + client_id = os.environ.get("OPENEO_AUTH_CLIENT_ID") + client_secret = os.environ.get("OPENEO_AUTH_CLIENT_SECRET") + _log.debug(f"Getting client id ({client_id}) and secret from environment") + + provider_id, client_info = self._get_oidc_provider_and_client_info( + provider_id=provider_id, client_id=client_id, client_secret=client_secret + ) + authenticator = OidcClientCredentialsAuthenticator(client_info=client_info) + return self._authenticate_oidc( + authenticator, provider_id=provider_id, store_refresh_token=False, oidc_auth_renewer=authenticator + )
+ + +
+[docs] + def authenticate_oidc_resource_owner_password_credentials( + self, + username: str, + password: str, + client_id: Optional[str] = None, + client_secret: Optional[str] = None, + provider_id: Optional[str] = None, + store_refresh_token: bool = False, + ) -> Connection: + """ + OpenId Connect Resource Owner Password Credentials + """ + provider_id, client_info = self._get_oidc_provider_and_client_info( + provider_id=provider_id, client_id=client_id, client_secret=client_secret + ) + # TODO: also get username and password from config? + authenticator = OidcResourceOwnerPasswordAuthenticator( + client_info=client_info, username=username, password=password + ) + return self._authenticate_oidc(authenticator, provider_id=provider_id, store_refresh_token=store_refresh_token)
+ + +
+[docs] + def authenticate_oidc_refresh_token( + self, + client_id: Optional[str] = None, + refresh_token: Optional[str] = None, + client_secret: Optional[str] = None, + provider_id: Optional[str] = None, + *, + store_refresh_token: bool = False, + ) -> Connection: + """ + Authenticate with :ref:`OIDC Refresh Token flow <authenticate_oidc_client_credentials>` + + :param client_id: client id to use + :param refresh_token: refresh token to use + :param client_secret: client secret to use + :param provider_id: provider id to use. + Fallback value can be set through environment variable ``OPENEO_AUTH_PROVIDER_ID``. + :param store_refresh_token: whether to store the received refresh token automatically + + .. versionchanged:: 0.19.0 Support fallback provider id through environment variable ``OPENEO_AUTH_PROVIDER_ID``. + """ + provider_id, client_info = self._get_oidc_provider_and_client_info( + provider_id=provider_id, client_id=client_id, client_secret=client_secret, + default_client_grant_check=[DefaultOidcClientGrant.REFRESH_TOKEN], + ) + + if refresh_token is None: + refresh_token = self._get_refresh_token_store().get_refresh_token( + issuer=client_info.provider.issuer, + client_id=client_info.client_id + ) + if refresh_token is None: + raise OpenEoClientException("No refresh token given or found") + + authenticator = OidcRefreshTokenAuthenticator(client_info=client_info, refresh_token=refresh_token) + return self._authenticate_oidc( + authenticator, + provider_id=provider_id, + store_refresh_token=store_refresh_token, + fallback_refresh_token_to_store=refresh_token, + oidc_auth_renewer=authenticator, + )
+ + +
+[docs] + def authenticate_oidc_device( + self, + client_id: Optional[str] = None, + client_secret: Optional[str] = None, + provider_id: Optional[str] = None, + *, + store_refresh_token: bool = False, + use_pkce: Optional[bool] = None, + max_poll_time: float = OidcDeviceAuthenticator.DEFAULT_MAX_POLL_TIME, + **kwargs, + ) -> Connection: + """ + Authenticate with the :ref:`OIDC Device Code flow <authenticate_oidc_device>` + + :param client_id: client id to use instead of the default one + :param client_secret: client secret to use instead of the default one + :param provider_id: provider id to use. + Fallback value can be set through environment variable ``OPENEO_AUTH_PROVIDER_ID``. + :param store_refresh_token: whether to store the received refresh token automatically + :param use_pkce: Use PKCE instead of client secret. + If not set explicitly to `True` (use PKCE) or `False` (use client secret), + it will be attempted to detect the best mode automatically. + Note that PKCE for device code is not widely supported among OIDC providers. + :param max_poll_time: maximum time in seconds to keep polling for successful authentication. + + .. versionchanged:: 0.5.1 Add :py:obj:`use_pkce` argument + .. versionchanged:: 0.17.0 Add :py:obj:`max_poll_time` argument + .. versionchanged:: 0.19.0 Support fallback provider id through environment variable ``OPENEO_AUTH_PROVIDER_ID``. + """ + _g = DefaultOidcClientGrant # alias for compactness + provider_id, client_info = self._get_oidc_provider_and_client_info( + provider_id=provider_id, client_id=client_id, client_secret=client_secret, + default_client_grant_check=(lambda grants: _g.DEVICE_CODE in grants or _g.DEVICE_CODE_PKCE in grants), + ) + authenticator = OidcDeviceAuthenticator( + client_info=client_info, use_pkce=use_pkce, max_poll_time=max_poll_time, **kwargs + ) + return self._authenticate_oidc(authenticator, provider_id=provider_id, store_refresh_token=store_refresh_token)
+ + +
+[docs] + def authenticate_oidc( + self, + provider_id: Optional[str] = None, + client_id: Optional[str] = None, + client_secret: Optional[str] = None, + *, + store_refresh_token: bool = True, + use_pkce: Optional[bool] = None, + display: Callable[[str], None] = print, + max_poll_time: float = OidcDeviceAuthenticator.DEFAULT_MAX_POLL_TIME, + ): + """ + Generic method to do OpenID Connect authentication. + + In the context of interactive usage, this method first tries to use refresh tokens + and falls back on device code flow. + + For non-interactive, machine-to-machine contexts, it is also possible to trigger + the usage of the "client_credentials" flow through environment variables. + Assuming you have set up a OIDC client (with a secret): + set ``OPENEO_AUTH_METHOD`` to ``client_credentials``, + set ``OPENEO_AUTH_CLIENT_ID`` to the client id, + and set ``OPENEO_AUTH_CLIENT_SECRET`` to the client secret. + + See :ref:`authenticate_oidc_automatic` for more details. + + :param provider_id: provider id to use + :param client_id: client id to use + :param client_secret: client secret to use + :param max_poll_time: maximum time in seconds to keep polling for successful authentication. + + .. versionadded:: 0.6.0 + .. versionchanged:: 0.17.0 Add :py:obj:`max_poll_time` argument + .. versionchanged:: 0.18.0 Add support for client credentials flow. + """ + # TODO: unify `os.environ.get` with `get_config_option`? + # TODO also support OPENEO_AUTH_CLIENT_ID, ... env vars for refresh token and device code auth? + + auth_method = os.environ.get("OPENEO_AUTH_METHOD") + if auth_method == "client_credentials": + _log.debug("authenticate_oidc: going for 'client_credentials' authentication") + return self.authenticate_oidc_client_credentials( + client_id=client_id, client_secret=client_secret, provider_id=provider_id + ) + elif auth_method: + raise ValueError(f"Unhandled auth method {auth_method}") + + _g = DefaultOidcClientGrant # alias for compactness + provider_id, client_info = self._get_oidc_provider_and_client_info( + provider_id=provider_id, client_id=client_id, client_secret=client_secret, + default_client_grant_check=lambda grants: ( + _g.REFRESH_TOKEN in grants and (_g.DEVICE_CODE in grants or _g.DEVICE_CODE_PKCE in grants) + ) + ) + + # Try refresh token first. + refresh_token = self._get_refresh_token_store().get_refresh_token( + issuer=client_info.provider.issuer, + client_id=client_info.client_id + ) + if refresh_token: + try: + _log.info("Found refresh token: trying refresh token based authentication.") + authenticator = OidcRefreshTokenAuthenticator(client_info=client_info, refresh_token=refresh_token) + con = self._authenticate_oidc( + authenticator, + provider_id=provider_id, + store_refresh_token=store_refresh_token, + fallback_refresh_token_to_store=refresh_token, + ) + # TODO: pluggable/jupyter-aware display function? + print("Authenticated using refresh token.") + return con + except OidcException as e: + _log.info("Refresh token based authentication failed: {e}.".format(e=e)) + + # Fall back on device code flow + # TODO: make it possible to do other fallback flows too? + _log.info("Trying device code flow.") + authenticator = OidcDeviceAuthenticator( + client_info=client_info, use_pkce=use_pkce, display=display, max_poll_time=max_poll_time + ) + con = self._authenticate_oidc( + authenticator, + provider_id=provider_id, + store_refresh_token=store_refresh_token, + ) + print("Authenticated using device code flow.") + return con
+ + +
+[docs] + def authenticate_oidc_access_token(self, access_token: str, provider_id: Optional[str] = None) -> Connection: + """ + Set up authorization headers directly with an OIDC access token. + + :py:class:`Connection` provides multiple methods to handle various OIDC authentication flows end-to-end. + If you already obtained a valid OIDC access token in another "out-of-band" way, you can use this method to + set up the authorization headers appropriately. + + :param access_token: OIDC access token + :param provider_id: id of the OIDC provider as listed by the openEO backend (``/credentials/oidc``). + If not specified, the first (default) OIDC provider will be used. + :param skip_verification: Skip clients-side verification of the provider_id + against the backend's list of providers to avoid and related OIDC configuration + + .. versionadded:: 0.31.0 + + .. versionchanged:: 0.33.0 + Return connection object to support chaining. + """ + provider_id, _ = self._get_oidc_provider(provider_id=provider_id, parse_info=False) + self.auth = OidcBearerAuth(provider_id=provider_id, access_token=access_token) + self._oidc_auth_renewer = None + return self
+ + +
+[docs] + def request( + self, + method: str, + path: str, + headers: Optional[dict] = None, + auth: Optional[AuthBase] = None, + check_error: bool = True, + expected_status: Optional[Union[int, Iterable[int]]] = None, + **kwargs, + ): + # Do request, but with retry when access token has expired and refresh token is available. + def _request(): + return super(Connection, self).request( + method=method, path=path, headers=headers, auth=auth, + check_error=check_error, expected_status=expected_status, **kwargs, + ) + + try: + # Initial request attempt + return _request() + except OpenEoApiError as api_exc: + if api_exc.http_status_code in {401, 403} and api_exc.code == "TokenInvalid": + # Auth token expired: can we refresh? + if isinstance(self.auth, OidcBearerAuth) and self._oidc_auth_renewer: + msg = f"OIDC access token expired ({api_exc.http_status_code} {api_exc.code})." + try: + self._authenticate_oidc( + authenticator=self._oidc_auth_renewer, + provider_id=self._oidc_auth_renewer.provider_info.id, + store_refresh_token=False, + oidc_auth_renewer=self._oidc_auth_renewer, + ) + _log.info(f"{msg} Obtained new access token (grant {self._oidc_auth_renewer.grant_type!r}).") + except OpenEoClientException as auth_exc: + _log.error( + f"{msg} Failed to obtain new access token (grant {self._oidc_auth_renewer.grant_type!r}): {auth_exc!r}." + ) + else: + # Retry request. + return _request() + raise
+ + +
+[docs] + def describe_account(self) -> dict: + """ + Describes the currently authenticated user account. + """ + return self.get('/me', expected_status=200).json()
+ + +
+[docs] + @deprecated("use :py:meth:`list_jobs` instead", version="0.4.10") + def user_jobs(self) -> List[dict]: + return self.list_jobs()
+ + +
+[docs] + def list_collections(self) -> List[dict]: + """ + List basic metadata of all collections provided by the back-end. + + .. caution:: + + Only the basic collection metadata will be returned. + To obtain full metadata of a particular collection, + it is recommended to use :py:meth:`~openeo.rest.connection.Connection.describe_collection` instead. + + :return: list of dictionaries with basic collection metadata. + """ + # TODO: add caching #383 + data = self.get('/collections', expected_status=200).json()["collections"] + return VisualList("collections", data=data)
+ + +
+[docs] + def list_collection_ids(self) -> List[str]: + """ + List all collection ids provided by the back-end. + + .. seealso:: + + :py:meth:`~openeo.rest.connection.Connection.describe_collection` + to get the metadata of a particular collection. + + :return: list of collection ids + """ + return [collection['id'] for collection in self.list_collections() if 'id' in collection]
+ + +
+[docs] + def capabilities(self) -> RESTCapabilities: + """ + Loads all available capabilities. + """ + return self._capabilities_cache.get( + "capabilities", + load=lambda: RESTCapabilities(data=self.get('/', expected_status=200).json(), url=self._orig_url) + )
+ + + def list_input_formats(self) -> dict: + return self.list_file_formats().get("input", {}) + + def list_output_formats(self) -> dict: + return self.list_file_formats().get("output", {}) + + list_file_types = legacy_alias( + list_output_formats, "list_file_types", since="0.4.6" + ) + +
+[docs] + def list_file_formats(self) -> dict: + """ + Get available input and output formats + """ + formats = self._capabilities_cache.get( + key="file_formats", + load=lambda: self.get('/file_formats', expected_status=200).json() + ) + return VisualDict("file-formats", data=formats)
+ + +
+[docs] + def list_service_types(self) -> dict: + """ + Loads all available service types. + + :return: data_dict: Dict All available service types + """ + types = self._capabilities_cache.get( + key="service_types", + load=lambda: self.get('/service_types', expected_status=200).json() + ) + return VisualDict("service-types", data=types)
+ + +
+[docs] + def list_udf_runtimes(self) -> dict: + """ + List information about the available UDF runtimes. + + :return: A dictionary with metadata about each available UDF runtime. + """ + runtimes = self._capabilities_cache.get( + key="udf_runtimes", + load=lambda: self.get('/udf_runtimes', expected_status=200).json() + ) + return VisualDict("udf-runtimes", data=runtimes)
+ + +
+[docs] + def list_services(self) -> dict: + """ + Loads all available services of the authenticated user. + + :return: data_dict: Dict All available services + """ + # TODO return parsed service objects + services = self.get('/services', expected_status=200).json()["services"] + return VisualList("data-table", data=services, parameters={'columns': 'services'})
+ + +
+[docs] + def describe_collection(self, collection_id: str) -> dict: + """ + Get full collection metadata for given collection id. + + .. seealso:: + + :py:meth:`~openeo.rest.connection.Connection.list_collection_ids` + to list all collection ids provided by the back-end. + + :param collection_id: collection id + :return: collection metadata. + """ + # TODO: duplication with `Connection.collection_metadata`: deprecate one or the other? + # TODO: add caching #383 + data = self.get(f"/collections/{collection_id}", expected_status=200).json() + return VisualDict("collection", data=data)
+ + +
+[docs] + def collection_items( + self, + name, + spatial_extent: Optional[List[float]] = None, + temporal_extent: Optional[List[Union[str, datetime.datetime]]] = None, + limit: Optional[int] = None, + ) -> Iterator[dict]: + """ + Loads items for a specific image collection. + May not be available for all collections. + + This is an experimental API and is subject to change. + + :param name: String Id of the collection + :param spatial_extent: Limits the items to the given bounding box in WGS84: + 1. Lower left corner, coordinate axis 1 + 2. Lower left corner, coordinate axis 2 + 3. Upper right corner, coordinate axis 1 + 4. Upper right corner, coordinate axis 2 + + :param temporal_extent: Limits the items to the specified temporal interval. + :param limit: The amount of items per request/page. If None, the back-end decides. + The interval has to be specified as an array with exactly two elements (start, end). + Also supports open intervals by setting one of the boundaries to None, but never both. + + :return: data_list: List A list of items + """ + url = '/collections/{}/items'.format(name) + params = {} + if spatial_extent: + params["bbox"] = ",".join(str(c) for c in spatial_extent) + if temporal_extent: + params["datetime"] = "/".join(".." if t is None else rfc3339.normalize(t) for t in temporal_extent) + if limit is not None and limit > 0: + params['limit'] = limit + + return paginate(self, url, params, lambda response, page: VisualDict("items", data = response, parameters = {'show-map': True, 'heading': 'Page {} - Items'.format(page)}))
+ + + def collection_metadata(self, name) -> CollectionMetadata: + # TODO: duplication with `Connection.describe_collection`: deprecate one or the other? + return CollectionMetadata(metadata=self.describe_collection(name)) + +
+[docs] + def list_processes(self, namespace: Optional[str] = None) -> List[dict]: + # TODO: Maybe format the result dictionary so that the process_id is the key of the dictionary. + """ + Loads all available processes of the back end. + + :param namespace: The namespace for which to list processes. + + :return: processes_dict: Dict All available processes of the back end. + """ + if namespace is None: + processes = self._capabilities_cache.get( + key=("processes", "backend"), + load=lambda: self.get('/processes', expected_status=200).json()["processes"] + ) + else: + processes = self.get('/processes/' + namespace, expected_status=200).json()["processes"] + return VisualList("processes", data=processes, parameters={'show-graph': True, 'provide-download': False})
+ + +
+[docs] + def describe_process(self, id: str, namespace: Optional[str] = None) -> dict: + """ + Returns a single process from the back end. + + :param id: The id of the process. + :param namespace: The namespace of the process. + + :return: The process definition. + """ + + processes = self.list_processes(namespace) + for process in processes: + if process["id"] == id: + return VisualDict("process", data=process, parameters={'show-graph': True, 'provide-download': False}) + + raise OpenEoClientException("Process does not exist.")
+ + +
+[docs] + def list_jobs(self, limit: Union[int, None] = None) -> List[dict]: + """ + Lists all jobs of the authenticated user. + + :param limit: maximum number of jobs to return. Setting this limit enables pagination. + + :return: job_list: Dict of all jobs of the user. + + .. versionadded:: 0.36.0 + Added ``limit`` argument + """ + # TODO: Parse the result so that Job classes returned? + resp = self.get("/jobs", params={"limit": limit}, expected_status=200).json() + if resp.get("federation:missing"): + _log.warning("Partial user job listing due to missing federation components: {c}".format( + c=",".join(resp["federation:missing"]) + )) + # TODO: when pagination is enabled: how to expose link to next page? + jobs = resp["jobs"] + return VisualList("data-table", data=jobs, parameters={'columns': 'jobs'})
+ + +
+[docs] + def assert_user_defined_process_support(self): + """ + Capabilities document based verification that back-end supports user-defined processes. + + .. versionadded:: 0.23.0 + """ + if not self.capabilities().supports_endpoint("/process_graphs"): + raise CapabilitiesException("Backend does not support user-defined processes.")
+ + +
+[docs] + def save_user_defined_process( + self, user_defined_process_id: str, + process_graph: Union[dict, ProcessBuilderBase], + parameters: List[Union[dict, Parameter]] = None, + public: bool = False, + summary: Optional[str] = None, + description: Optional[str] = None, + returns: Optional[dict] = None, + categories: Optional[List[str]] = None, + examples: Optional[List[dict]] = None, + links: Optional[List[dict]] = None, + ) -> RESTUserDefinedProcess: + """ + Store a process graph and its metadata on the backend as a user-defined process for the authenticated user. + + :param user_defined_process_id: unique identifier for the user-defined process + :param process_graph: a process graph + :param parameters: a list of parameters + :param public: visible to other users? + :param summary: A short summary of what the process does. + :param description: Detailed description to explain the entity. CommonMark 0.29 syntax MAY be used for rich text representation. + :param returns: Description and schema of the return value. + :param categories: A list of categories. + :param examples: A list of examples. + :param links: A list of links. + :return: a RESTUserDefinedProcess instance + """ + self.assert_user_defined_process_support() + if user_defined_process_id in set(p["id"] for p in self.list_processes()): + warnings.warn("Defining user-defined process {u!r} with same id as a pre-defined process".format( + u=user_defined_process_id)) + if not parameters: + warnings.warn("Defining user-defined process {u!r} without parameters".format(u=user_defined_process_id)) + udp = RESTUserDefinedProcess(user_defined_process_id=user_defined_process_id, connection=self) + udp.store( + process_graph=process_graph, parameters=parameters, public=public, + summary=summary, description=description, + returns=returns, categories=categories, examples=examples, links=links + ) + return udp
+ + +
+[docs] + def list_user_defined_processes(self) -> List[dict]: + """ + Lists all user-defined processes of the authenticated user. + """ + self.assert_user_defined_process_support() + data = self.get("/process_graphs", expected_status=200).json()["processes"] + return VisualList("processes", data=data, parameters={'show-graph': True, 'provide-download': False})
+ + +
+[docs] + def user_defined_process(self, user_defined_process_id: str) -> RESTUserDefinedProcess: + """ + Get the user-defined process based on its id. The process with the given id should already exist. + + :param user_defined_process_id: the id of the user-defined process + :return: a RESTUserDefinedProcess instance + """ + return RESTUserDefinedProcess(user_defined_process_id=user_defined_process_id, connection=self)
+ + +
+[docs] + def validate_process_graph( + self, process_graph: Union[dict, FlatGraphableMixin, str, Path, List[FlatGraphableMixin]] + ) -> List[dict]: + """ + Validate a process graph without executing it. + + :param process_graph: openEO-style (flat) process graph representation, + or an object that can be converted to such a representation: + a dictionary, a :py:class:`~openeo.rest.datacube.DataCube` object, + a string with a JSON representation, + a local file path or URL to a JSON representation, + a :py:class:`~openeo.rest.multiresult.MultiResult` object, ... + + :return: list of errors (dictionaries with "code" and "message" fields) + """ + pg_with_metadata = self._build_request_with_process_graph(process_graph)["process"] + return self.post(path="/validation", json=pg_with_metadata, expected_status=200).json()["errors"]
+ + + @property + def _api_version(self) -> ComparableVersion: + # TODO make this a public property (it's also useful outside the Connection class) + return self.capabilities().api_version_check + +
+[docs] + def vectorcube_from_paths( + self, paths: List[str], format: str, options: dict = {} + ) -> VectorCube: + """ + Loads one or more files referenced by url or path that is accessible by the backend. + + :param paths: The files to read. + :param format: The file format to read from. It must be one of the values that the server reports as supported input file formats. + :param options: The file format parameters to be used to read the files. Must correspond to the parameters that the server reports as supported parameters for the chosen format. + + :return: A :py:class:`VectorCube`. + + .. versionadded:: 0.14.0 + """ + # TODO #457 deprecate this in favor of `load_url` and standard support for `load_uploaded_files` + graph = PGNode( + "load_uploaded_files", + arguments=dict(paths=paths, format=format, options=options), + ) + # TODO: load_uploaded_files might also return a raster data cube. Determine this based on format? + return VectorCube(graph=graph, connection=self)
+ + +
+[docs] + def datacube_from_process(self, process_id: str, namespace: Optional[str] = None, **kwargs) -> DataCube: + """ + Load a data cube from a (custom) process. + + :param process_id: The process id. + :param namespace: optional: process namespace + :param kwargs: The arguments of the custom process + :return: A :py:class:`DataCube`, without valid metadata, as the client is not aware of this custom process. + """ + graph = PGNode(process_id, namespace=namespace, arguments=kwargs) + return DataCube(graph=graph, connection=self)
+ + +
+[docs] + def datacube_from_flat_graph(self, flat_graph: dict, parameters: Optional[dict] = None) -> DataCube: + """ + Construct a :py:class:`DataCube` from a flat dictionary representation of a process graph. + + .. seealso:: :ref:`datacube_from_json`, :py:meth:`~openeo.rest.connection.Connection.datacube_from_json` + + :param flat_graph: flat dictionary representation of a process graph + or a process dictionary with such a flat process graph under a "process_graph" field + (and optionally parameter metadata under a "parameters" field). + :param parameters: Optional dictionary mapping parameter names to parameter values + to use for parameters occurring in the process graph (e.g. as used in user-defined processes) + :return: A :py:class:`DataCube` corresponding with the operations encoded in the process graph + """ + parameters = parameters or {} + + if "process_graph" in flat_graph: + # `flat_graph` is a "process" structure + # Extract defaults from declared parameters. + for param in flat_graph.get("parameters") or []: + if "default" in param: + parameters.setdefault(param["name"], param["default"]) + + flat_graph = flat_graph["process_graph"] + + pgnode = PGNode.from_flat_graph(flat_graph=flat_graph, parameters=parameters or {}) + return DataCube(graph=pgnode, connection=self)
+ + +
+[docs] + def datacube_from_json(self, src: Union[str, Path], parameters: Optional[dict] = None) -> DataCube: + """ + Construct a :py:class:`DataCube` from JSON resource containing (flat) process graph representation. + + .. seealso:: :ref:`datacube_from_json`, :py:meth:`~openeo.rest.connection.Connection.datacube_from_flat_graph` + + :param src: raw JSON string, URL to JSON resource or path to local JSON file + :param parameters: Optional dictionary mapping parameter names to parameter values + to use for parameters occurring in the process graph (e.g. as used in user-defined processes) + :return: A :py:class:`DataCube` corresponding with the operations encoded in the process graph + """ + return self.datacube_from_flat_graph(load_json_resource(src), parameters=parameters)
+ + +
+[docs] + @openeo_process + def load_collection( + self, + collection_id: Union[str, Parameter], + spatial_extent: Union[Dict[str, float], Parameter, None] = None, + temporal_extent: Union[Sequence[InputDate], Parameter, str, None] = None, + bands: Union[None, List[str], Parameter] = None, + properties: Union[ + None, Dict[str, Union[str, PGNode, Callable]], List[CollectionProperty], CollectionProperty + ] = None, + max_cloud_cover: Optional[float] = None, + fetch_metadata: bool = True, + ) -> DataCube: + """ + Load a DataCube by collection id. + + :param collection_id: image collection identifier + :param spatial_extent: limit data to specified bounding box or polygons + :param temporal_extent: limit data to specified temporal interval. + Typically, just a two-item list or tuple containing start and end date. + See :ref:`filtering-on-temporal-extent-section` for more details on temporal extent handling and shorthand notation. + :param bands: only add the specified bands. + :param properties: limit data by collection metadata property predicates. + See :py:func:`~openeo.rest.graph_building.collection_property` for easy construction of such predicates. + :param max_cloud_cover: shortcut to set maximum cloud cover ("eo:cloud_cover" collection property) + :return: a datacube containing the requested data + + .. versionadded:: 0.13.0 + added the ``max_cloud_cover`` argument. + + .. versionchanged:: 0.23.0 + Argument ``temporal_extent``: add support for year/month shorthand notation + as discussed at :ref:`date-shorthand-handling`. + + .. versionchanged:: 0.26.0 + Add :py:func:`~openeo.rest.graph_building.collection_property` support to ``properties`` argument. + """ + return DataCube.load_collection( + collection_id=collection_id, + connection=self, + spatial_extent=spatial_extent, + temporal_extent=temporal_extent, + bands=bands, + properties=properties, + max_cloud_cover=max_cloud_cover, + fetch_metadata=fetch_metadata, + )
+ + + # TODO: remove this #100 #134 0.4.10 + imagecollection = legacy_alias( + load_collection, name="imagecollection", since="0.4.10" + ) + +
+[docs] + @openeo_process + def load_result( + self, + id: str, + spatial_extent: Optional[Dict[str, float]] = None, + temporal_extent: Union[Sequence[InputDate], Parameter, str, None] = None, + bands: Optional[List[str]] = None, + ) -> DataCube: + """ + Loads batch job results by job id from the server-side user workspace. + The job must have been stored by the authenticated user on the back-end currently connected to. + + :param id: The id of a batch job with results. + :param spatial_extent: limit data to specified bounding box or polygons + :param temporal_extent: limit data to specified temporal interval. + Typically, just a two-item list or tuple containing start and end date. + See :ref:`filtering-on-temporal-extent-section` for more details on temporal extent handling and shorthand notation. + :param bands: only add the specified bands + + :return: a :py:class:`DataCube` + + .. versionchanged:: 0.23.0 + Argument ``temporal_extent``: add support for year/month shorthand notation + as discussed at :ref:`date-shorthand-handling`. + """ + # TODO: add check that back-end supports `load_result` process? + cube = self.datacube_from_process( + process_id="load_result", + id=id, + **dict_no_none( + spatial_extent=spatial_extent, + temporal_extent=temporal_extent and DataCube._get_temporal_extent(extent=temporal_extent), + bands=bands, + ), + ) + return cube
+ + +
+[docs] + @openeo_process + def load_stac( + self, + url: str, + spatial_extent: Union[Dict[str, float], Parameter, None] = None, + temporal_extent: Union[Sequence[InputDate], Parameter, str, None] = None, + bands: Optional[List[str]] = None, + properties: Optional[Dict[str, Union[str, PGNode, Callable]]] = None, + ) -> DataCube: + """ + Loads data from a static STAC catalog or a STAC API Collection and returns the data as a processable :py:class:`DataCube`. + A batch job result can be loaded by providing a reference to it. + + If supported by the underlying metadata and file format, the data that is added to the data cube can be + restricted with the parameters ``spatial_extent``, ``temporal_extent`` and ``bands``. + If no data is available for the given extents, a ``NoDataAvailable`` error is thrown. + + Remarks: + + * The bands (and all dimensions that specify nominal dimension labels) are expected to be ordered as + specified in the metadata if the ``bands`` parameter is set to ``null``. + * If no additional parameter is specified this would imply that the whole data set is expected to be loaded. + Due to the large size of many data sets, this is not recommended and may be optimized by back-ends to only + load the data that is actually required after evaluating subsequent processes such as filters. + This means that the values should be processed only after the data has been limited to the required extent + and as a consequence also to a manageable size. + + + :param url: The URL to a static STAC catalog (STAC Item, STAC Collection, or STAC Catalog) + or a specific STAC API Collection that allows to filter items and to download assets. + This includes batch job results, which itself are compliant to STAC. + For external URLs, authentication details such as API keys or tokens may need to be included in the URL. + + Batch job results can be specified in two ways: + + - For Batch job results at the same back-end, a URL pointing to the corresponding batch job results + endpoint should be provided. The URL usually ends with ``/jobs/{id}/results`` and ``{id}`` + is the corresponding batch job ID. + - For external results, a signed URL must be provided. Not all back-ends support signed URLs, + which are provided as a link with the link relation `canonical` in the batch job result metadata. + :param spatial_extent: + Limits the data to load to the specified bounding box or polygons. + + For raster data, the process loads the pixel into the data cube if the point at the pixel center intersects + with the bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). + + For vector data, the process loads the geometry into the data cube if the geometry is fully within the + bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). + Empty geometries may only be in the data cube if no spatial extent has been provided. + + The GeoJSON can be one of the following feature types: + + * A ``Polygon`` or ``MultiPolygon`` geometry, + * a ``Feature`` with a ``Polygon`` or ``MultiPolygon`` geometry, or + * a ``FeatureCollection`` containing at least one ``Feature`` with ``Polygon`` or ``MultiPolygon`` geometries. + + Set this parameter to ``None`` to set no limit for the spatial extent. + Be careful with this when loading large datasets. It is recommended to use this parameter instead of + using ``filter_bbox()`` or ``filter_spatial()`` directly after loading unbounded data. + + :param temporal_extent: + Limits the data to load to the specified left-closed temporal interval. + Applies to all temporal dimensions. + The interval has to be specified as an array with exactly two elements: + + 1. The first element is the start of the temporal interval. + The specified instance in time is **included** in the interval. + 2. The second element is the end of the temporal interval. + The specified instance in time is **excluded** from the interval. + + The second element must always be greater/later than the first element. + Otherwise, a `TemporalExtentEmpty` exception is thrown. + + Also supports open intervals by setting one of the boundaries to ``None``, but never both. + + Set this parameter to ``None`` to set no limit for the temporal extent. + Be careful with this when loading large datasets. It is recommended to use this parameter instead of + using ``filter_temporal()`` directly after loading unbounded data. + + :param bands: + Only adds the specified bands into the data cube so that bands that don't match the list + of band names are not available. Applies to all dimensions of type `bands`. + + Either the unique band name (metadata field ``name`` in bands) or one of the common band names + (metadata field ``common_name`` in bands) can be specified. + If the unique band name and the common name conflict, the unique band name has a higher priority. + + The order of the specified array defines the order of the bands in the data cube. + If multiple bands match a common name, all matched bands are included in the original order. + + It is recommended to use this parameter instead of using ``filter_bands()`` directly after loading unbounded data. + + :param properties: + Limits the data by metadata properties to include only data in the data cube which + all given conditions return ``True`` for (AND operation). + + Specify key-value-pairs with the key being the name of the metadata property, + which can be retrieved with the openEO Data Discovery for Collections. + The value must be a condition (user-defined process) to be evaluated against a STAC API. + This parameter is not supported for static STAC. + + .. versionadded:: 0.17.0 + + .. versionchanged:: 0.23.0 + Argument ``temporal_extent``: add support for year/month shorthand notation + as discussed at :ref:`date-shorthand-handling`. + """ + return DataCube.load_stac( + url=url, + spatial_extent=spatial_extent, + temporal_extent=temporal_extent, + bands=bands, + properties=properties, + connection=self, + )
+ + +
+[docs] + def load_stac_from_job( + self, + job: Union[BatchJob, str], + spatial_extent: Union[Dict[str, float], Parameter, None] = None, + temporal_extent: Union[Sequence[InputDate], Parameter, str, None] = None, + bands: Optional[List[str]] = None, + properties: Optional[Dict[str, Union[str, PGNode, Callable]]] = None, + ) -> DataCube: + """ + Convenience function to directly load the results of a finished openEO job + (as a STAC collection) with :py:meth:`load_stac` in a new openEO process graph. + + When available, the "canonical" link (signed URL) of the job results will be used. + + :param job: a :py:class:`~openeo.rest.job.BatchJob` or job id pointing to a finished job. + Note that the :py:class:`~openeo.rest.job.BatchJob` approach allows to point + to a batch job on a different back-end. + :param spatial_extent: limit data to specified bounding box or polygons + :param temporal_extent: limit data to specified temporal interval. + :param bands: limit data to the specified bands + + .. versionadded:: 0.30.0 + """ + # TODO #634 add option to require or avoid the canonical link + if isinstance(job, str): + job = BatchJob(job_id=job, connection=self) + elif not isinstance(job, BatchJob): + raise ValueError("job must be a BatchJob or job id") + + try: + job_results = job.get_results() + + canonical_links = [ + link["href"] + for link in job_results.get_metadata().get("links", []) + if link.get("rel") == "canonical" and "href" in link + ] + if len(canonical_links) == 0: + _log.warning("No canonical link found in job results metadata. Using job results URL instead.") + stac_link = job.get_results_metadata_url(full=True) + else: + if len(canonical_links) > 1: + _log.warning( + f"Multiple canonical links found in job results metadata: {canonical_links}. Picking first one." + ) + stac_link = canonical_links[0] + except OpenEoApiError as e: + _log.warning(f"Failed to get the canonical job results: {e!r}. Using job results URL instead.") + stac_link = job.get_results_metadata_url(full=True) + + return self.load_stac( + url=stac_link, + spatial_extent=spatial_extent, + temporal_extent=temporal_extent, + bands=bands, + properties=properties, + )
+ + +
+[docs] + def load_ml_model(self, id: Union[str, BatchJob]) -> MlModel: + """ + Loads a machine learning model from a STAC Item. + + :param id: STAC item reference, as URL, batch job (id) or user-uploaded file + :return: + + .. versionadded:: 0.10.0 + """ + return MlModel.load_ml_model(connection=self, id=id)
+ + +
+[docs] + @openeo_process + def load_geojson( + self, + data: Union[dict, str, Path, shapely.geometry.base.BaseGeometry, Parameter], + properties: Optional[List[str]] = None, + ): + """ + Converts GeoJSON data as defined by RFC 7946 into a vector data cube. + + :param data: the geometry to load. One of: + + - GeoJSON-style data structure: e.g. a dictionary with ``"type": "Polygon"`` and ``"coordinates"`` fields + - a path to a local GeoJSON file + - a GeoJSON string + - a shapely geometry object + + :param properties: A list of properties from the GeoJSON file to construct an additional dimension from. + :return: new VectorCube instance + + .. warning:: EXPERIMENTAL: this process is experimental with the potential for major things to change. + + .. versionadded:: 0.22.0 + """ + return VectorCube.load_geojson(connection=self, data=data, properties=properties)
+ + +
+[docs] + @openeo_process + def load_url(self, url: str, format: str, options: Optional[dict] = None): + """ + Loads a file from a URL + + :param url: The URL to read from. Authentication details such as API keys or tokens may need to be included in the URL. + :param format: The file format to use when loading the data. + :param options: The file format parameters to use when reading the data. + Must correspond to the parameters that the server reports as supported parameters for the chosen ``format`` + :return: new VectorCube instance + + .. warning:: EXPERIMENTAL: this process is experimental with the potential for major things to change. + + .. versionadded:: 0.22.0 + """ + if format not in self.list_input_formats(): + # TODO: make this an error? + _log.warning(f"Format {format!r} not listed in back-end input formats") + # TODO: Inspect format's gis_data_type to see if we need to load a VectorCube or classic raster DataCube + return VectorCube.load_url(connection=self, url=url, format=format, options=options)
+ + + def create_service(self, graph: dict, type: str, **kwargs) -> Service: + # TODO: type hint for graph: is it a nested or a flat one? + pg_with_metadata = self._build_request_with_process_graph(process_graph=graph, type=type, **kwargs) + self._preflight_validation(pg_with_metadata=pg_with_metadata) + response = self.post(path="/services", json=pg_with_metadata, expected_status=201) + service_id = response.headers.get("OpenEO-Identifier") + return Service(service_id, self) + +
+[docs] + @deprecated("Use :py:meth:`openeo.rest.service.Service.delete_service` instead.", version="0.8.0") + def remove_service(self, service_id: str): + """ + Stop and remove a secondary web service. + + :param service_id: service identifier + :return: + """ + Service(service_id, self).delete_service()
+ + +
+[docs] + @deprecated("Use :py:meth:`openeo.rest.job.BatchJob.get_results` instead.", version="0.4.10") + def job_results(self, job_id) -> dict: + """Get batch job results metadata.""" + return BatchJob(job_id=job_id, connection=self).list_results()
+ + +
+[docs] + @deprecated("Use :py:meth:`openeo.rest.job.BatchJob.logs` instead.", version="0.4.10") + def job_logs(self, job_id, offset) -> list: + """Get batch job logs.""" + return BatchJob(job_id=job_id, connection=self).logs(offset=offset)
+ + +
+[docs] + def list_files(self) -> List[UserFile]: + """ + Lists all user-uploaded files in the user workspace on the back-end. + + :return: List of the user-uploaded files. + """ + files = self.get('/files', expected_status=200).json()['files'] + files = [UserFile.from_metadata(metadata=f, connection=self) for f in files] + return VisualList("data-table", data=files, parameters={'columns': 'files'})
+ + +
+[docs] + def get_file( + self, path: Union[str, PurePosixPath], metadata: Optional[dict] = None + ) -> UserFile: + """ + Gets a handle to a user-uploaded file in the user workspace on the back-end. + + :param path: The path on the user workspace. + """ + return UserFile(path=path, connection=self, metadata=metadata)
+ + +
+[docs] + def upload_file( + self, + source: Union[Path, str], + target: Optional[Union[str, PurePosixPath]] = None, + ) -> UserFile: + """ + Uploads a file to the given target location in the user workspace on the back-end. + + If a file at the target path exists in the user workspace it will be replaced. + + :param source: A path to a file on the local file system to upload. + :param target: The desired path (which can contain a folder structure if desired) on the user workspace. + If not set: defaults to the original filename (without any folder structure) of the local file . + """ + source = Path(source) + target = target or source.name + # TODO: support other non-path sources too: bytes, open file, url, ... + with source.open("rb") as f: + resp = self.put(f"/files/{target!s}", expected_status=200, data=f) + metadata = resp.json() + return UserFile.from_metadata(metadata=metadata, connection=self)
+ + + def _build_request_with_process_graph( + self, + process_graph: Union[dict, FlatGraphableMixin, str, Path, List[FlatGraphableMixin]], + additional: Optional[dict] = None, + job_options: Optional[dict] = None, + **kwargs, + ) -> dict: + """ + Prepare a json payload with a process graph to submit to /result, /services, /jobs, ... + :param process_graph: flat dict representing a "process graph with metadata" ({"process": {"process_graph": ...}, ...}) + """ + # TODO: make this a more general helper (like `as_flat_graph`) + connections = extract_connections(process_graph) + if any(c != self for c in connections): + raise OpenEoClientException(f"Mixing different connections: {self} and {connections}.") + result = kwargs + + if additional: + result.update(additional) + if job_options is not None: + # Note: this "job_options" top-level property is not in official openEO API spec, + # but a commonly used convention, e.g. in openeo-python-driver based deployments. + assert "job_options" not in result + result["job_options"] = job_options + + process_graph = as_flat_graph(process_graph) + if "process_graph" not in process_graph: + process_graph = {"process_graph": process_graph} + # TODO: also check if `process_graph` already has "process" key (i.e. is a "process graph with metadata" already) + result["process"] = process_graph + return result + + def _preflight_validation(self, pg_with_metadata: dict, *, validate: Optional[bool] = None): + """ + Preflight validation of process graph to execute. + + :param pg_with_metadata: flat dict representation of process graph with metadata, + e.g. as produced by `_build_request_with_process_graph` + :param validate: Optional toggle to enable/prevent validation of the process graphs before execution + (overruling the connection's ``auto_validate`` setting). + + :return: + """ + if validate is None: + validate = self._auto_validate + if validate and self.capabilities().supports_endpoint("/validation", "POST"): + # At present, the intention is that a failed validation does not block + # the job from running, it is only reported as a warning. + # Therefor we also want to continue when something *else* goes wrong + # *during* the validation. + try: + resp = self.post(path="/validation", json=pg_with_metadata["process"], expected_status=200) + validation_errors = resp.json()["errors"] + if validation_errors: + _log.warning( + "Preflight process graph validation raised: " + + (" ".join(f"[{e.get('code')}] {e.get('message')}" for e in validation_errors)) + ) + except Exception as e: + _log.error(f"Preflight process graph validation failed: {e}") + + # TODO: additional validation and sanity checks: e.g. is there a result node, are all process_ids valid, ...? + + # TODO: unify `download` and `execute` better: e.g. `download` always writes to disk, `execute` returns result (raw or as JSON decoded dict) +
+[docs] + def download( + self, + graph: Union[dict, FlatGraphableMixin, str, Path, List[FlatGraphableMixin]], + outputfile: Union[Path, str, None] = None, + *, + timeout: Optional[int] = None, + validate: Optional[bool] = None, + chunk_size: int = DEFAULT_DOWNLOAD_CHUNK_SIZE, + additional: Optional[dict] = None, + job_options: Optional[dict] = None, + ) -> Union[None, bytes]: + """ + Downloads the result of a process graph synchronously, + and save the result to the given file or return bytes object if no outputfile is specified. + This method is useful to export binary content such as images. For json content, the execute method is recommended. + + :param graph: (flat) dict representing a process graph, or process graph as raw JSON string, + or as local file path or URL + :param outputfile: output file + :param timeout: timeout to wait for response + :param validate: Optional toggle to enable/prevent validation of the process graphs before execution + (overruling the connection's ``auto_validate`` setting). + :param chunk_size: chunk size for streaming response. + :param additional: additional (top-level) properties to set in the request body + :param job_options: dictionary of job options to pass to the backend + (under top-level property "job_options") + + .. versionadded:: 0.36.0 + Added arguments ``additional`` and ``job_options``. + """ + pg_with_metadata = self._build_request_with_process_graph( + process_graph=graph, additional=additional, job_options=job_options + ) + self._preflight_validation(pg_with_metadata=pg_with_metadata, validate=validate) + response = self.post( + path="/result", + json=pg_with_metadata, + expected_status=200, + stream=True, + timeout=timeout or DEFAULT_TIMEOUT_SYNCHRONOUS_EXECUTE, + ) + + if outputfile is not None: + with Path(outputfile).open(mode="wb") as f: + for chunk in response.iter_content(chunk_size=chunk_size): + f.write(chunk) + else: + return response.content
+ + +
+[docs] + def execute( + self, + process_graph: Union[dict, FlatGraphableMixin, str, Path, List[FlatGraphableMixin]], + *, + timeout: Optional[int] = None, + validate: Optional[bool] = None, + auto_decode: bool = True, + additional: Optional[dict] = None, + job_options: Optional[dict] = None, + ) -> Union[dict, requests.Response]: + """ + Execute a process graph synchronously and return the result. If the result is a JSON object, it will be parsed. + + :param process_graph: (flat) dict representing a process graph, or process graph as raw JSON string, + or as local file path or URL + :param validate: Optional toggle to enable/prevent validation of the process graphs before execution + (overruling the connection's ``auto_validate`` setting). + :param auto_decode: Boolean flag to enable/disable automatic JSON decoding of the response. Defaults to True. + :param additional: additional (top-level) properties to set in the request body + :param job_options: dictionary of job options to pass to the backend + (under top-level property "job_options") + + :return: parsed JSON response as a dict if auto_decode is True, otherwise response object + + .. versionadded:: 0.36.0 + Added arguments ``additional`` and ``job_options``. + """ + pg_with_metadata = self._build_request_with_process_graph( + process_graph=process_graph, additional=additional, job_options=job_options + ) + self._preflight_validation(pg_with_metadata=pg_with_metadata, validate=validate) + response = self.post( + path="/result", + json=pg_with_metadata, + expected_status=200, + timeout=timeout or DEFAULT_TIMEOUT_SYNCHRONOUS_EXECUTE, + ) + if auto_decode: + try: + return response.json() + except requests.exceptions.JSONDecodeError as e: + raise OpenEoClientException( + "Failed to decode response as JSON. For other data types use `download` method instead of `execute`." + ) from e + else: + return response
+ + +
+[docs] + def create_job( + self, + process_graph: Union[dict, FlatGraphableMixin, str, Path, List[FlatGraphableMixin]], + *, + title: Optional[str] = None, + description: Optional[str] = None, + plan: Optional[str] = None, + budget: Optional[float] = None, + additional: Optional[dict] = None, + job_options: Optional[dict] = None, + validate: Optional[bool] = None, + ) -> BatchJob: + """ + Create a new job from given process graph on the back-end. + + :param process_graph: openEO-style (flat) process graph representation, + or an object that can be converted to such a representation: + a dictionary, a :py:class:`~openeo.rest.datacube.DataCube` object, + a string with a JSON representation, + a local file path or URL to a JSON representation, + a :py:class:`~openeo.rest.multiresult.MultiResult` object, ... + :param title: job title + :param description: job description + :param plan: The billing plan to process and charge the job with + :param budget: Maximum budget to be spent on executing the job. + Note that some backends do not honor this limit. + :param additional: additional (top-level) properties to set in the request body + :param job_options: dictionary of job options to pass to the backend + (under top-level property "job_options") + :param validate: Optional toggle to enable/prevent validation of the process graphs before execution + (overruling the connection's ``auto_validate`` setting). + :return: Created job + + .. versionchanged:: 0.35.0 + Add :ref:`multi-result support <multi-result-process-graphs>`. + + .. versionadded:: 0.36.0 + Added argument ``job_options``. + """ + # TODO move all this (BatchJob factory) logic to BatchJob? + + pg_with_metadata = self._build_request_with_process_graph( + process_graph=process_graph, + additional=additional, + job_options=job_options, + **dict_no_none(title=title, description=description, plan=plan, budget=budget) + ) + + self._preflight_validation(pg_with_metadata=pg_with_metadata, validate=validate) + response = self.post("/jobs", json=pg_with_metadata, expected_status=201) + + job_id = None + if "openeo-identifier" in response.headers: + job_id = response.headers['openeo-identifier'].strip() + elif "location" in response.headers: + _log.warning("Backend did not explicitly respond with job id, will guess it from redirect URL.") + job_id = response.headers['location'].split("/")[-1] + if not job_id: + raise OpenEoClientException("Job creation response did not contain a valid job id") + return BatchJob(job_id=job_id, connection=self)
+ + +
+[docs] + def job(self, job_id: str) -> BatchJob: + """ + Get the job based on the id. The job with the given id should already exist. + + Use :py:meth:`openeo.rest.connection.Connection.create_job` to create new jobs + + :param job_id: the job id of an existing job + :return: A job object. + """ + return BatchJob(job_id=job_id, connection=self)
+ + +
+[docs] + def service(self, service_id: str) -> Service: + """ + Get the secondary web service based on the id. The service with the given id should already exist. + + Use :py:meth:`openeo.rest.connection.Connection.create_service` to create new services + + :param job_id: the service id of an existing secondary web service + :return: A service object. + """ + return Service(service_id, connection=self)
+ + +
+[docs] + @deprecated( + reason="Depends on non-standard process, replace with :py:meth:`openeo.rest.connection.Connection.load_stac` where possible.", + version="0.25.0") + def load_disk_collection( + self, format: str, glob_pattern: str, options: Optional[dict] = None + ) -> DataCube: + """ + Loads image data from disk as a :py:class:`DataCube`. + + This is backed by a non-standard process ('load_disk_data'). This will eventually be replaced by standard options such as + :py:meth:`openeo.rest.connection.Connection.load_stac` or https://processes.openeo.org/#load_uploaded_files + + :param format: the file format, e.g. 'GTiff' + :param glob_pattern: a glob pattern that matches the files to load from disk + :param options: options specific to the file format + """ + return DataCube.load_disk_collection( + self, format, glob_pattern, **(options or {}) + )
+ + +
+[docs] + def as_curl( + self, + data: Union[dict, DataCube, FlatGraphableMixin], + *, + path="/result", + method="POST", + obfuscate_auth: bool = False, + additional: Optional[dict] = None, + job_options: Optional[dict] = None, + ) -> str: + """ + Build curl command to evaluate given process graph or data cube + (including authorization and content-type headers). + + >>> print(connection.as_curl(cube)) + curl -i -X POST -H 'Content-Type: application/json' -H 'Authorization: Bearer ...' \\ + --data '{"process":{"process_graph":{...}}' \\ + https://openeo.example/openeo/1.1/result + + :param data: something that is convertable to an openEO process graph: a dictionary, + a :py:class:`~openeo.rest.datacube.DataCube` object, + a :py:class:`~openeo.processes.ProcessBuilder`, ... + :param path: endpoint to send request to: typically ``"/result"`` (default) for synchronous requests + or ``"/jobs"`` for batch jobs + :param method: HTTP method to use (typically ``"POST"``) + :param obfuscate_auth: don't show actual bearer token + :param additional: additional (top-level) properties to set in the request body + :param job_options: dictionary of job options to pass to the backend + (under top-level property "job_options") + + :return: curl command as a string + + .. versionadded:: 0.36.0 + Added arguments ``additional`` and ``job_options``. + """ + cmd = ["curl", "-i", "-X", method] + cmd += ["-H", "Content-Type: application/json"] + if isinstance(self.auth, BearerAuth): + cmd += ["-H", f"Authorization: Bearer {'...' if obfuscate_auth else self.auth.bearer}"] + pg_with_metadata = self._build_request_with_process_graph(data, additional=additional, job_options=job_options) + if path == "/validation": + pg_with_metadata = pg_with_metadata["process"] + post_json = json.dumps(pg_with_metadata, separators=(",", ":")) + cmd += ["--data", post_json] + cmd += [self.build_url(path)] + return " ".join(shlex.quote(c) for c in cmd)
+ + +
+[docs] + def version_info(self): + """List version of the openEO client, API, back-end, etc.""" + capabilities = self.capabilities() + return { + "client": openeo.client_version(), + "api": capabilities.api_version(), + "backend": dict_no_none({ + "root_url": self.root_url, + "version": capabilities.get("backend_version"), + "processing:software": capabilities.get("processing:software"), + }), + }
+
+ + + +
+[docs] +def connect( + url: Optional[str] = None, + *, + auth_type: Optional[str] = None, + auth_options: Optional[dict] = None, + session: Optional[requests.Session] = None, + default_timeout: Optional[int] = None, + auto_validate: bool = True, +) -> Connection: + """ + This method is the entry point to OpenEO. + You typically create one connection object in your script or application + and re-use it for all calls to that backend. + + If the backend requires authentication, you can pass authentication data directly to this function, + but it could be easier to authenticate as follows: + + >>> # For basic authentication + >>> conn = connect(url).authenticate_basic(username="john", password="foo") + >>> # For OpenID Connect authentication + >>> conn = connect(url).authenticate_oidc(client_id="myclient") + + :param url: The http url of the OpenEO back-end. + :param auth_type: Which authentication to use: None, "basic" or "oidc" (for OpenID Connect) + :param auth_options: Options/arguments specific to the authentication type + :param default_timeout: default timeout (in seconds) for requests + :param auto_validate: toggle to automatically validate process graphs before execution + + .. versionadded:: 0.24.0 + added ``auto_validate`` argument + """ + + def _config_log(message): + _log.info(message) + config_log(message) + + if url is None: + default_backend = get_config_option("connection.default_backend") + if default_backend: + url = default_backend + _config_log(f"Using default back-end URL {url!r} (from config)") + default_backend_auto_auth = get_config_option("connection.default_backend.auto_authenticate") + if default_backend_auto_auth and default_backend_auto_auth.lower() in {"basic", "oidc"}: + auth_type = default_backend_auto_auth.lower() + _config_log(f"Doing auto-authentication {auth_type!r} (from config)") + + if auth_type is None: + auto_authenticate = get_config_option("connection.auto_authenticate") + if auto_authenticate and auto_authenticate.lower() in {"basic", "oidc"}: + auth_type = auto_authenticate.lower() + _config_log(f"Doing auto-authentication {auth_type!r} (from config)") + + if not url: + raise OpenEoClientException("No openEO back-end URL given or known to connect to.") + connection = Connection(url, session=session, default_timeout=default_timeout, auto_validate=auto_validate) + + auth_type = auth_type.lower() if isinstance(auth_type, str) else auth_type + if auth_type in {None, False, 'null', 'none'}: + pass + elif auth_type == "basic": + connection.authenticate_basic(**(auth_options or {})) + elif auth_type in {"oidc", "openid"}: + connection.authenticate_oidc(**(auth_options or {})) + else: + raise ValueError("Unknown auth type {a!r}".format(a=auth_type)) + return connection
+ + + +@deprecated("Use :py:func:`openeo.connect` instead", version="0.0.9") +def session(userid=None, endpoint: str = "https://openeo.org/openeo") -> Connection: + """ + This method is the entry point to OpenEO. You typically create one session object in your script or application, per back-end. + and re-use it for all calls to that backend. + If the backend requires authentication, you should set pass your credentials. + + :param endpoint: The http url of an OpenEO endpoint. + :rtype: openeo.sessions.Session + """ + return connect(url=endpoint) + + +def paginate(con: Connection, url: str, params: Optional[dict] = None, callback: Callable = lambda resp, page: resp): + # TODO: make this a method `get_paginated` on `RestApiConnection`? + # TODO: is it necessary to have `callback`? It's only used just before yielding, + # so it's probably cleaner (even for the caller) to to move it outside. + page = 1 + while True: + response = con.get(url, params=params).json() + yield callback(response, page) + next_links = [link for link in response.get("links", []) if link.get("rel") == "next" and "href" in link] + if not next_links: + break + url = next_links[0]["href"] + page += 1 + params = {} + + +def extract_connections( + data: Union[_ProcessGraphAbstraction, Sequence[_ProcessGraphAbstraction], Any] +) -> Set[Connection]: + """ + Extract the :py:class:`Connection` object(s) linked from a given data construct. + Typical use case is to get the connection from a :py:class:`DataCube`, + but can also extract multiple connections from a list of data cubes. + """ + connections = set() + # TODO: define some kind of "Connected" interface/mixin/protocol + # for objects that contain a connection instead of just checking for _ProcessGraphAbstraction + # TODO: also support extracting connections from other objects like BatchJob, ... + if isinstance(data, _ProcessGraphAbstraction) and data.connection: + connections.add(data.connection) + elif isinstance(data, (list, tuple, set)): + for item in data: + if isinstance(item, _ProcessGraphAbstraction) and item.connection: + connections.add(item.connection) + + return connections +
+ +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/openeo/rest/conversions.html b/_modules/openeo/rest/conversions.html new file mode 100644 index 000000000..62ac47778 --- /dev/null +++ b/_modules/openeo/rest/conversions.html @@ -0,0 +1,263 @@ + + + + + + + openeo.rest.conversions — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +

Source code for openeo.rest.conversions

+"""
+Helpers for data conversions between Python ecosystem data types and openEO data structures.
+"""
+
+from __future__ import annotations
+
+import typing
+
+import numpy as np
+import pandas
+
+from openeo.internal.warnings import deprecated
+
+if typing.TYPE_CHECKING:
+    # Imports for type checking only (circular import issue at runtime).
+    import xarray
+
+    from openeo.udf import XarrayDataCube
+
+
+
+[docs] +class InvalidTimeSeriesException(ValueError): + pass
+ + + +
+[docs] +def timeseries_json_to_pandas(timeseries: dict, index: str = "date", auto_collapse=True) -> pandas.DataFrame: + """ + Convert a timeseries JSON object as returned by the `aggregate_spatial` process to a pandas DataFrame object + + This timeseries data has three dimensions in general: date, polygon index and band index. + One of these will be used as index of the resulting dataframe (as specified by the `index` argument), + and the other two will be used as multilevel columns. + When there is just a single polygon or band in play, the dataframe will be simplified + by removing the corresponding dimension if `auto_collapse` is enabled (on by default). + + :param timeseries: dictionary as returned by `aggregate_spatial` + :param index: which dimension should be used for the DataFrame index: 'date' or 'polygon' + :param auto_collapse: whether single band or single polygon cases should be simplified automatically + + :return: pandas DataFrame or Series + """ + # The input timeseries dictionary is assumed to have this structure: + # {dict mapping date -> [list with one item per polygon: [list with one float/None per band or empty list]]} + # TODO is this format of `aggregate_spatial` standardized across backends? Or can we detect the structure? + # TODO: option to pass a path to a JSON file as input? + + # Some quick checks + if len(timeseries) == 0: + raise InvalidTimeSeriesException("Empty data set") + polygon_counts = set(len(polygon_data) for polygon_data in timeseries.values()) + if polygon_counts == {0}: + raise InvalidTimeSeriesException("No polygon data for each date") + elif 0 in polygon_counts: + # TODO: still support this use case? + raise InvalidTimeSeriesException("No polygon data for some dates ({p})".format(p=polygon_counts)) + elif len(polygon_counts) > 1: + raise InvalidTimeSeriesException("Inconsistent polygon counts: {p}".format(p=polygon_counts)) + # Count the number of bands in the timeseries, so we can provide a fallback array for missing data + band_counts = set(len(band_data) for polygon_data in timeseries.values() for band_data in polygon_data) + if band_counts == {0}: + raise InvalidTimeSeriesException("Zero bands everywhere") + band_counts.discard(0) + if len(band_counts) != 1: + raise InvalidTimeSeriesException("Inconsistent band counts: {b}".format(b=band_counts)) + band_count = band_counts.pop() + band_data_fallback = [np.nan] * band_count + # Load the timeseries data in a pandas Series with multi-index ["date", "polygon", "band"] + s = pandas.DataFrame.from_records( + ( + (date, polygon_index, band_index, value) + for (date, polygon_data) in timeseries.items() + for polygon_index, band_data in enumerate(polygon_data) + for band_index, value in enumerate(band_data or band_data_fallback) + ), + columns=["date", "polygon", "band", "value"], + index=["date", "polygon", "band"] + )["value"].rename(None) + # TODO convert date to real date index? + + if auto_collapse: + if s.index.levshape[2] == 1: + # Single band case + s.index = s.index.droplevel("band") + if s.index.levshape[1] == 1: + # Single polygon case + s.index = s.index.droplevel("polygon") + + # Reshape as desired + if index == "date": + if len(s.index.names) > 1: + return s.unstack("date").T + else: + return s + elif index == "polygon": + return s.unstack("polygon").T + else: + raise ValueError(index)
+ + + +
+[docs] +@deprecated("Use :py:meth:`XarrayDataCube.from_file` instead.", version="0.7.0") +def datacube_from_file(filename, fmt="netcdf") -> XarrayDataCube: + from openeo.udf.xarraydatacube import XarrayDataCube + return XarrayDataCube.from_file(path=filename, fmt=fmt)
+ + + +
+[docs] +@deprecated("Use :py:meth:`XarrayDataCube.save_to_file` instead.", version="0.7.0") +def datacube_to_file(datacube: XarrayDataCube, filename, fmt="netcdf"): + return datacube.save_to_file(path=filename, fmt=fmt)
+ + + +@deprecated("Use :py:meth:`XarrayIO.to_json_file` instead", version="0.7.0") +def _save_DataArray_to_JSON(filename, array: xarray.DataArray): + from openeo.udf.xarraydatacube import XarrayIO + return XarrayIO.to_json_file(array=array, path=filename) + + +@deprecated("Use :py:meth:`XarrayIO.to_netcdf_file` instead", version="0.7.0") +def _save_DataArray_to_NetCDF(filename, array: xarray.DataArray): + from openeo.udf.xarraydatacube import XarrayIO + return XarrayIO.to_netcdf_file(array=array, path=filename) + + +
+[docs] +@deprecated("Use :py:meth:`XarrayDataCube.plot` instead.", version="0.7.0") +def datacube_plot(datacube: XarrayDataCube, *args, **kwargs): + datacube.plot(*args, **kwargs)
+ +
+ +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/openeo/rest/datacube.html b/_modules/openeo/rest/datacube.html new file mode 100644 index 000000000..465e4d0bb --- /dev/null +++ b/_modules/openeo/rest/datacube.html @@ -0,0 +1,3260 @@ + + + + + + + openeo.rest.datacube — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +

Source code for openeo.rest.datacube

+"""
+The main module for creating earth observation processes. It aims to easily build complex process chains, that can
+be evaluated by an openEO backend.
+
+.. data:: THIS
+
+    Symbolic reference to the current data cube, to be used as argument in :py:meth:`DataCube.process()` calls
+
+"""
+from __future__ import annotations
+
+import datetime
+import logging
+import pathlib
+import re
+import typing
+import urllib.parse
+import warnings
+from builtins import staticmethod
+from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Tuple, Union
+
+import numpy as np
+import requests
+import shapely.geometry
+import shapely.geometry.base
+from shapely.geometry import MultiPolygon, Polygon, mapping
+
+from openeo.api.process import Parameter, schema_supports
+from openeo.dates import get_temporal_extent
+from openeo.internal.documentation import openeo_process
+from openeo.internal.graph_building import PGNode, ReduceNode, _FromNodeMixin
+from openeo.internal.jupyter import in_jupyter_context
+from openeo.internal.processes.builder import (
+    ProcessBuilderBase,
+    convert_callable_to_pgnode,
+    get_parameter_names,
+)
+from openeo.internal.warnings import UserDeprecationWarning, deprecated, legacy_alias
+from openeo.metadata import (
+    Band,
+    BandDimension,
+    CollectionMetadata,
+    SpatialDimension,
+    TemporalDimension,
+    metadata_from_stac,
+)
+from openeo.processes import ProcessBuilder
+from openeo.rest import BandMathException, OpenEoClientException, OperatorException
+from openeo.rest._datacube import (
+    THIS,
+    UDF,
+    _ensure_save_result,
+    _ProcessGraphAbstraction,
+    build_child_callback,
+)
+from openeo.rest.graph_building import CollectionProperty
+from openeo.rest.job import BatchJob, RESTJob
+from openeo.rest.mlmodel import MlModel
+from openeo.rest.service import Service
+from openeo.rest.udp import RESTUserDefinedProcess
+from openeo.rest.vectorcube import VectorCube
+from openeo.util import dict_no_none, guess_format, load_json, normalize_crs, rfc3339
+
+if typing.TYPE_CHECKING:
+    # Imports for type checking only (circular import issue at runtime).
+    import xarray
+
+    from openeo.rest.connection import Connection
+    from openeo.udf import XarrayDataCube
+
+
+log = logging.getLogger(__name__)
+
+
+# Type annotation aliases
+InputDate = Union[str, datetime.date, Parameter, PGNode, ProcessBuilderBase, None]
+
+
+
+[docs] +class DataCube(_ProcessGraphAbstraction): + """ + Class representing a openEO (raster) data cube. + + The data cube is represented by its corresponding openeo "process graph" + and this process graph can be "grown" to a desired workflow by calling the appropriate methods. + """ + + # TODO: set this based on back-end or user preference? + _DEFAULT_RASTER_FORMAT = "GTiff" + +
+[docs] + def __init__( + self, graph: PGNode, connection: Optional[Connection] = None, metadata: Optional[CollectionMetadata] = None + ): + super().__init__(pgnode=graph, connection=connection) + self.metadata: Optional[CollectionMetadata] = metadata
+ + +
+[docs] + def process( + self, + process_id: str, + arguments: Optional[dict] = None, + metadata: Optional[CollectionMetadata] = None, + namespace: Optional[str] = None, + **kwargs, + ) -> DataCube: + """ + Generic helper to create a new DataCube by applying a process. + + :param process_id: process id of the process. + :param arguments: argument dictionary for the process. + :param metadata: optional: metadata to override original cube metadata (e.g. when reducing dimensions) + :param namespace: optional: process namespace + :return: new DataCube instance + """ + pg = self._build_pgnode(process_id=process_id, arguments=arguments, namespace=namespace, **kwargs) + return DataCube(graph=pg, connection=self._connection, metadata=metadata or self.metadata)
+ + + graph_add_node = legacy_alias(process, "graph_add_node", since="0.1.1") + +
+[docs] + def process_with_node(self, pg: PGNode, metadata: Optional[CollectionMetadata] = None) -> DataCube: + """ + Generic helper to create a new DataCube by applying a process (given as process graph node) + + :param pg: process graph node (containing process id and arguments) + :param metadata: optional: metadata to override original cube metadata (e.g. when reducing dimensions) + :return: new DataCube instance + """ + # TODO: deep copy `self.metadata` instead of using same instance? + # TODO: cover more cases where metadata has to be altered + # TODO: deprecate `process_with_node``: little added value over just calling DataCube() directly + return DataCube(graph=pg, connection=self._connection, metadata=metadata or self.metadata)
+ + + def _do_metadata_normalization(self) -> bool: + """Do metadata-based normalization/validation of dimension names, band names, ...""" + return isinstance(self.metadata, CollectionMetadata) + + def _assert_valid_dimension_name(self, name: str) -> str: + if self._do_metadata_normalization(): + self.metadata.assert_valid_dimension(name) + return name + +
+[docs] + @classmethod + @openeo_process + def load_collection( + cls, + collection_id: Union[str, Parameter], + connection: Optional[Connection] = None, + spatial_extent: Union[Dict[str, float], Parameter, None] = None, + temporal_extent: Union[Sequence[InputDate], Parameter, str, None] = None, + bands: Union[None, List[str], Parameter] = None, + fetch_metadata: bool = True, + properties: Union[ + None, Dict[str, Union[str, PGNode, typing.Callable]], List[CollectionProperty], CollectionProperty + ] = None, + max_cloud_cover: Optional[float] = None, + ) -> DataCube: + """ + Create a new Raster Data cube. + + :param collection_id: image collection identifier + :param connection: The backend connection to use. + Can be ``None`` to work without connection and collection metadata. + :param spatial_extent: limit data to specified bounding box or polygons + :param temporal_extent: limit data to specified temporal interval. + Typically, just a two-item list or tuple containing start and end date. + See :ref:`filtering-on-temporal-extent-section` for more details on temporal extent handling and shorthand notation. + :param bands: only add the specified bands. + :param properties: limit data by metadata property predicates. + See :py:func:`~openeo.rest.graph_building.collection_property` for easy construction of such predicates. + :param max_cloud_cover: shortcut to set maximum cloud cover ("eo:cloud_cover" collection property) + :return: new DataCube containing the collection + + .. versionchanged:: 0.13.0 + added the ``max_cloud_cover`` argument. + + .. versionchanged:: 0.23.0 + Argument ``temporal_extent``: add support for year/month shorthand notation + as discussed at :ref:`date-shorthand-handling`. + + .. versionchanged:: 0.26.0 + Add :py:func:`~openeo.rest.graph_building.collection_property` support to ``properties`` argument. + """ + if temporal_extent: + temporal_extent = cls._get_temporal_extent(extent=temporal_extent) + + if isinstance(spatial_extent, Parameter): + if not schema_supports(spatial_extent.schema, type="object"): + warnings.warn( + "Unexpected parameterized `spatial_extent` in `load_collection`:" + f" expected schema compatible with type 'object' but got {spatial_extent.schema!r}." + ) + arguments = { + 'id': collection_id, + # TODO: spatial_extent could also be a "geojson" subtype object, so we might want to allow (and convert) shapely shapes as well here. + 'spatial_extent': spatial_extent, + 'temporal_extent': temporal_extent, + } + if isinstance(collection_id, Parameter): + fetch_metadata = False + metadata: Optional[CollectionMetadata] = ( + connection.collection_metadata(collection_id) if connection and fetch_metadata else None + ) + if bands: + if isinstance(bands, str): + bands = [bands] + elif isinstance(bands, Parameter): + metadata = None + if metadata: + bands = [b if isinstance(b, str) else metadata.band_dimension.band_name(b) for b in bands] + # TODO: also apply spatial/temporal filters to metadata? + metadata = metadata.filter_bands(bands) + arguments['bands'] = bands + + if isinstance(properties, list): + # TODO: warn about items that are not CollectionProperty objects instead of silently dropping them. + properties = {p.name: p.from_node() for p in properties if isinstance(p, CollectionProperty)} + if isinstance(properties, CollectionProperty): + properties = {properties.name: properties.from_node()} + elif properties is None: + properties = {} + if max_cloud_cover: + properties["eo:cloud_cover"] = lambda v: v <= max_cloud_cover + if properties: + summaries = metadata and metadata.get("summaries") or {} + undefined_properties = set(properties.keys()).difference(summaries.keys()) + if undefined_properties: + warnings.warn( + f"{collection_id} property filtering with properties that are undefined " + f"in the collection metadata (summaries): {', '.join(undefined_properties)}.", + stacklevel=2, + ) + arguments["properties"] = { + prop: build_child_callback(pred, parent_parameters=["value"]) for prop, pred in properties.items() + } + + pg = PGNode( + process_id='load_collection', + arguments=arguments + ) + return cls(graph=pg, connection=connection, metadata=metadata)
+ + + create_collection = legacy_alias( + load_collection, name="create_collection", since="0.4.6" + ) + +
+[docs] + @classmethod + @deprecated(reason="Depends on non-standard process, replace with :py:meth:`openeo.rest.connection.Connection.load_stac` where possible.",version="0.25.0") + def load_disk_collection(cls, connection: Connection, file_format: str, glob_pattern: str, **options) -> DataCube: + """ + Loads image data from disk as a DataCube. + This is backed by a non-standard process ('load_disk_data'). This will eventually be replaced by standard options such as + :py:meth:`openeo.rest.connection.Connection.load_stac` or https://processes.openeo.org/#load_uploaded_files + + + :param connection: The connection to use to connect with the backend. + :param file_format: the file format, e.g. 'GTiff' + :param glob_pattern: a glob pattern that matches the files to load from disk + :param options: options specific to the file format + :return: the data as a DataCube + """ + pg = PGNode( + process_id='load_disk_data', + arguments={ + 'format': file_format, + 'glob_pattern': glob_pattern, + 'options': options + } + ) + return cls(graph=pg, connection=connection)
+ + +
+[docs] + @classmethod + def load_stac( + cls, + url: str, + spatial_extent: Union[Dict[str, float], Parameter, None] = None, + temporal_extent: Union[Sequence[InputDate], Parameter, str, None] = None, + bands: Optional[List[str]] = None, + properties: Optional[Dict[str, Union[str, PGNode, Callable]]] = None, + connection: Optional[Connection] = None, + ) -> DataCube: + """ + Loads data from a static STAC catalog or a STAC API Collection and returns the data as a processable :py:class:`DataCube`. + A batch job result can be loaded by providing a reference to it. + + If supported by the underlying metadata and file format, the data that is added to the data cube can be + restricted with the parameters ``spatial_extent``, ``temporal_extent`` and ``bands``. + If no data is available for the given extents, a ``NoDataAvailable`` error is thrown. + + Remarks: + + * The bands (and all dimensions that specify nominal dimension labels) are expected to be ordered as + specified in the metadata if the ``bands`` parameter is set to ``null``. + * If no additional parameter is specified this would imply that the whole data set is expected to be loaded. + Due to the large size of many data sets, this is not recommended and may be optimized by back-ends to only + load the data that is actually required after evaluating subsequent processes such as filters. + This means that the values should be processed only after the data has been limited to the required extent + and as a consequence also to a manageable size. + + + :param url: The URL to a static STAC catalog (STAC Item, STAC Collection, or STAC Catalog) + or a specific STAC API Collection that allows to filter items and to download assets. + This includes batch job results, which itself are compliant to STAC. + For external URLs, authentication details such as API keys or tokens may need to be included in the URL. + + Batch job results can be specified in two ways: + + - For Batch job results at the same back-end, a URL pointing to the corresponding batch job results + endpoint should be provided. The URL usually ends with ``/jobs/{id}/results`` and ``{id}`` + is the corresponding batch job ID. + - For external results, a signed URL must be provided. Not all back-ends support signed URLs, + which are provided as a link with the link relation `canonical` in the batch job result metadata. + :param spatial_extent: + Limits the data to load to the specified bounding box or polygons. + + For raster data, the process loads the pixel into the data cube if the point at the pixel center intersects + with the bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). + + For vector data, the process loads the geometry into the data cube if the geometry is fully within the + bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). + Empty geometries may only be in the data cube if no spatial extent has been provided. + + The GeoJSON can be one of the following feature types: + + * A ``Polygon`` or ``MultiPolygon`` geometry, + * a ``Feature`` with a ``Polygon`` or ``MultiPolygon`` geometry, or + * a ``FeatureCollection`` containing at least one ``Feature`` with ``Polygon`` or ``MultiPolygon`` geometries. + + Set this parameter to ``None`` to set no limit for the spatial extent. + Be careful with this when loading large datasets. It is recommended to use this parameter instead of + using ``filter_bbox()`` or ``filter_spatial()`` directly after loading unbounded data. + + :param temporal_extent: + Limits the data to load to the specified left-closed temporal interval. + Applies to all temporal dimensions. + The interval has to be specified as an array with exactly two elements: + + 1. The first element is the start of the temporal interval. + The specified instance in time is **included** in the interval. + 2. The second element is the end of the temporal interval. + The specified instance in time is **excluded** from the interval. + + The second element must always be greater/later than the first element. + Otherwise, a `TemporalExtentEmpty` exception is thrown. + + Also supports open intervals by setting one of the boundaries to ``None``, but never both. + + Set this parameter to ``None`` to set no limit for the temporal extent. + Be careful with this when loading large datasets. It is recommended to use this parameter instead of + using ``filter_temporal()`` directly after loading unbounded data. + + :param bands: + Only adds the specified bands into the data cube so that bands that don't match the list + of band names are not available. Applies to all dimensions of type `bands`. + + Either the unique band name (metadata field ``name`` in bands) or one of the common band names + (metadata field ``common_name`` in bands) can be specified. + If the unique band name and the common name conflict, the unique band name has a higher priority. + + The order of the specified array defines the order of the bands in the data cube. + If multiple bands match a common name, all matched bands are included in the original order. + + It is recommended to use this parameter instead of using ``filter_bands()`` directly after loading unbounded data. + + :param properties: + Limits the data by metadata properties to include only data in the data cube which + all given conditions return ``True`` for (AND operation). + + Specify key-value-pairs with the key being the name of the metadata property, + which can be retrieved with the openEO Data Discovery for Collections. + The value must be a condition (user-defined process) to be evaluated against a STAC API. + This parameter is not supported for static STAC. + + :param connection: The connection to use to connect with the backend. + + .. versionadded:: 0.33.0 + + """ + arguments = {"url": url} + # TODO #425 more normalization/validation of extent/band parameters + if spatial_extent: + arguments["spatial_extent"] = spatial_extent + if temporal_extent: + arguments["temporal_extent"] = DataCube._get_temporal_extent(extent=temporal_extent) + if bands: + arguments["bands"] = bands + if properties: + arguments["properties"] = { + prop: build_child_callback(pred, parent_parameters=["value"]) for prop, pred in properties.items() + } + graph = PGNode("load_stac", arguments=arguments) + try: + metadata = metadata_from_stac(url) + if bands: + # TODO: also apply spatial/temporal filters to metadata? + metadata = metadata.filter_bands(band_names=bands) + except Exception: + log.warning(f"Failed to extract cube metadata from STAC URL {url}", exc_info=True) + metadata = None + return cls(graph=graph, connection=connection, metadata=metadata)
+ + + @classmethod + def _get_temporal_extent( + cls, + *args, + start_date: InputDate = None, + end_date: InputDate = None, + extent: Union[Sequence[InputDate], Parameter, str, None] = None, + ) -> Union[List[Union[str, Parameter, PGNode, None]], Parameter]: + """Parameter aware temporal_extent normalizer""" + # TODO: move this outside of DataCube class + # TODO: return extent as tuple instead of list + if len(args) == 1 and isinstance(args[0], Parameter): + assert start_date is None and end_date is None and extent is None + return args[0] + elif len(args) == 0 and isinstance(extent, Parameter): + assert start_date is None and end_date is None + # TODO: warn about unexpected parameter schema + return extent + else: + def convertor(d: Any) -> Any: + # TODO: can this be generalized through _FromNodeMixin? + if isinstance(d, Parameter) or isinstance(d, PGNode): + # TODO: warn about unexpected parameter schema + return d + elif isinstance(d, ProcessBuilderBase): + return d.pgnode + else: + return rfc3339.normalize(d) + + return list( + get_temporal_extent(*args, start_date=start_date, end_date=end_date, extent=extent, convertor=convertor) + ) + +
+[docs] + @openeo_process + def filter_temporal( + self, + *args, + start_date: InputDate = None, + end_date: InputDate = None, + extent: Union[Sequence[InputDate], Parameter, str, None] = None, + ) -> DataCube: + """ + Limit the DataCube to a certain date range, which can be specified in several ways: + + >>> cube.filter_temporal("2019-07-01", "2019-08-01") + >>> cube.filter_temporal(["2019-07-01", "2019-08-01"]) + >>> cube.filter_temporal(extent=["2019-07-01", "2019-08-01"]) + >>> cube.filter_temporal(start_date="2019-07-01", end_date="2019-08-01"]) + + See :ref:`filtering-on-temporal-extent-section` for more details on temporal extent handling and shorthand notation. + + :param start_date: start date of the filter (inclusive), as a string or date object + :param end_date: end date of the filter (exclusive), as a string or date object + :param extent: temporal extent. + Typically, specified as a two-item list or tuple containing start and end date. + + .. versionchanged:: 0.23.0 + Arguments ``start_date``, ``end_date`` and ``extent``: + add support for year/month shorthand notation as discussed at :ref:`date-shorthand-handling`. + """ + if len(args) == 1 and isinstance(args[0], (str)): + raise OpenEoClientException( + f"filter_temporal() with a single string argument ({args[0]!r}) is ambiguous." + f" If you want a half-unbounded interval, use something like filter_temporal({args[0]!r}, None) or use explicit keyword arguments." + f" If you want the full interval covering all of {args[0]!r}, use something like filter_temporal(extent={args[0]!r})." + ) + return self.process( + process_id='filter_temporal', + arguments={ + 'data': THIS, + 'extent': self._get_temporal_extent(*args, start_date=start_date, end_date=end_date, extent=extent) + } + )
+ + +
+[docs] + @openeo_process + def filter_bbox( + self, + *args, + west: Optional[float] = None, + south: Optional[float] = None, + east: Optional[float] = None, + north: Optional[float] = None, + crs: Optional[Union[int, str]] = None, + base: Optional[float] = None, + height: Optional[float] = None, + bbox: Union[Sequence[float], Parameter, None] = None, + ) -> DataCube: + """ + Limits the data cube to the specified bounding box. + + The bounding box can be specified in multiple ways. + + - With keyword arguments:: + + >>> cube.filter_bbox(west=3, south=51, east=4, north=52, crs=4326) + + - With a (west, south, east, north) list or tuple + (note that EPSG:4326 is the default CRS, so it's not necessary to specify it explicitly):: + + >>> cube.filter_bbox([3, 51, 4, 52]) + >>> cube.filter_bbox(bbox=[3, 51, 4, 52]) + + - With a bbox dictionary:: + + >>> bbox = {"west": 3, "south": 51, "east": 4, "north": 52, "crs": 4326} + >>> cube.filter_bbox(bbox) + >>> cube.filter_bbox(bbox=bbox) + >>> cube.filter_bbox(**bbox) + + - With a shapely geometry (of which the bounding box will be used):: + + >>> cube.filter_bbox(geometry) + >>> cube.filter_bbox(bbox=geometry) + + - Passing a parameter:: + + >>> bbox_param = Parameter(name="my_bbox", schema="object") + >>> cube.filter_bbox(bbox_param) + >>> cube.filter_bbox(bbox=bbox_param) + + - With a CRS other than EPSG 4326:: + + >>> cube.filter_bbox( + ... west=652000, east=672000, north=5161000, south=5181000, + ... crs=32632 + ... ) + + - Deprecated: positional arguments are also supported, + but follow a non-standard order for legacy reasons:: + + >>> west, east, north, south = 3, 4, 52, 51 + >>> cube.filter_bbox(west, east, north, south) + + :param crs: value describing the coordinate reference system. + Typically just an int (interpreted as EPSG code, e.g. ``4326``) + or a string (handled as authority string, e.g. ``"EPSG:4326"``). + See :py:func:`openeo.util.normalize_crs` for more details about additional normalization that is applied to this argument. + """ + if args and any(k is not None for k in (west, south, east, north, bbox)): + raise ValueError("Don't mix positional arguments with keyword arguments.") + if bbox and any(k is not None for k in (west, south, east, north)): + raise ValueError("Don't mix `bbox` with `west`/`south`/`east`/`north` keyword arguments.") + + if args: + if 4 <= len(args) <= 5: + # Handle old-style west-east-north-south order + # TODO remove handling of this legacy order? + warnings.warn("Deprecated argument order usage: `filter_bbox(west, east, north, south)`." + " Use keyword arguments or tuple/list argument instead.") + west, east, north, south = args[:4] + if len(args) > 4: + crs = normalize_crs(args[4]) + elif len(args) == 1 and (isinstance(args[0], (list, tuple)) and len(args[0]) == 4 + or isinstance(args[0], (dict, shapely.geometry.base.BaseGeometry, Parameter))): + bbox = args[0] + else: + raise ValueError(args) + + if isinstance(bbox, Parameter): + if not schema_supports(bbox.schema, type="object"): + warnings.warn( + "Unexpected parameterized `extent` in `filter_bbox`:" + f" expected schema compatible with type 'object' but got {bbox.schema!r}." + ) + extent = bbox + else: + if bbox: + if isinstance(bbox, shapely.geometry.base.BaseGeometry): + west, south, east, north = bbox.bounds + elif isinstance(bbox, (list, tuple)) and len(bbox) == 4: + west, south, east, north = bbox[:4] + elif isinstance(bbox, dict): + west, south, east, north = (bbox[k] for k in ["west", "south", "east", "north"]) + if "crs" in bbox: + crs = bbox["crs"] + else: + raise ValueError(bbox) + + extent = {'west': west, 'east': east, 'north': north, 'south': south} + extent.update(dict_no_none(crs=crs, base=base, height=height)) + + return self.process( + process_id='filter_bbox', + arguments={ + 'data': THIS, + 'extent': extent + } + )
+ + +
+[docs] + @openeo_process + def filter_spatial( + self, geometries: Union[shapely.geometry.base.BaseGeometry, dict, str, pathlib.Path, Parameter, VectorCube] + ) -> DataCube: + """ + Limits the data cube over the spatial dimensions to the specified geometries. + + - For polygons, the filter retains a pixel in the data cube if the point at the pixel center intersects with + at least one of the polygons (as defined in the Simple Features standard by the OGC). + - For points, the process considers the closest pixel center. + - For lines (line strings), the process considers all the pixels whose centers are closest to at least one + point on the line. + + More specifically, pixels outside of the bounding box of the given geometry will not be available after filtering. + All pixels inside the bounding box that are not retained will be set to null (no data). + + :param geometries: One or more geometries used for filtering, Can be provided in different ways: + + - a shapely geometry + - a GeoJSON-style dictionary, + - a public URL to the geometries in a vector format that is supported by the backend + (also see :py:func:`Connection.list_file_formats() <openeo.rest.connection.Connection.list_file_formats>`), + e.g. GeoJSON, GeoParquet, etc. + A ``load_url`` process will automatically be added to the process graph. + - a path (:py:class:`str` or :py:class:`~pathlib.Path`) to a local, client-side GeoJSON file, + which will be loaded automatically to get the geometries as GeoJSON construct. + - a :py:class:`~openeo.rest.vectorcube.VectorCube` instance. + - a :py:class:`~openeo.api.process.Parameter` instance. + + :return: A data cube restricted to the specified geometries. The dimensions and dimension properties (name, + type, labels, reference system and resolution) remain unchanged, except that the spatial dimensions have less + (or the same) dimension labels. + + .. versionchanged:: 0.36.0 + Support passing a URL as ``geometries`` argument, which will be loaded with the ``load_url`` process. + + .. versionchanged:: 0.36.0 + Support for passing a backend-side path as ``geometries`` argument was removed + (also see :ref:`legacy_read_vector`). + Instead, it's possible to provide a client-side path to a GeoJSON file + (which will be loaded client-side to get the geometries as GeoJSON construct). + """ + valid_geojson_types = [ + "Point", "MultiPoint", "LineString", "MultiLineString", + "Polygon", "MultiPolygon", "GeometryCollection", "FeatureCollection" + ] + geometries = self._get_geometry_argument(geometries, valid_geojson_types=valid_geojson_types, crs=None) + return self.process( + process_id='filter_spatial', + arguments={ + 'data': THIS, + 'geometries': geometries + } + )
+ + +
+[docs] + @openeo_process + def filter_bands(self, bands: Union[List[Union[str, int]], str]) -> DataCube: + """ + Filter the data cube by the given bands + + :param bands: list of band names, common names or band indices. Single band name can also be given as string. + :return: a DataCube instance + """ + if isinstance(bands, str): + bands = [bands] + if self._do_metadata_normalization(): + bands = [self.metadata.band_dimension.band_name(b) for b in bands] + cube = self.process( + process_id="filter_bands", + arguments={"data": THIS, "bands": bands}, + metadata=self.metadata.filter_bands(bands) if self.metadata else None, + ) + return cube
+ + +
+[docs] + @openeo_process + def filter_labels( + self, condition: Union[PGNode, Callable], dimension: str, context: Optional[dict] = None + ) -> DataCube: + """ + Filters the dimension labels in the data cube for the given dimension. + Only the dimension labels that match the specified condition are preserved, + all other labels with their corresponding data get removed. + + :param condition: the "child callback" which will be given a single label value (number or string) + and returns a boolean expressing if the label should be preserved. + Also see :ref:`callbackfunctions`. + :param dimension: The name of the dimension to filter on. + + .. versionadded:: 0.27.0 + """ + condition = build_child_callback(condition, parent_parameters=["value"]) + return self.process( + process_id="filter_labels", + arguments=dict_no_none(data=THIS, condition=condition, dimension=dimension, context=context), + )
+ + + band_filter = legacy_alias(filter_bands, "band_filter", since="0.1.0") + +
+[docs] + def band(self, band: Union[str, int]) -> DataCube: + """ + Filter out a single band + + :param band: band name, band common name or band index. + :return: a DataCube instance + """ + if self._do_metadata_normalization(): + band = self.metadata.band_dimension.band_index(band) + arguments = {"data": {"from_parameter": "data"}} + if isinstance(band, int): + arguments["index"] = band + else: + arguments["label"] = band + return self.reduce_bands(reducer=PGNode(process_id="array_element", arguments=arguments))
+ + +
+[docs] + @openeo_process + def resample_spatial( + self, resolution: Union[float, Tuple[float, float]], projection: Union[int, str] = None, + method: str = 'near', align: str = 'upper-left' + ) -> DataCube: + return self.process('resample_spatial', { + 'data': THIS, + 'resolution': resolution, + 'projection': projection, + 'method': method, + 'align': align + })
+ + +
+[docs] + def resample_cube_spatial(self, target: DataCube, method: str = "near") -> DataCube: + """ + Resamples the spatial dimensions (x,y) from a source data cube to align with the corresponding + dimensions of the given target data cube. + Returns a new data cube with the resampled dimensions. + + To resample a data cube to a specific resolution or projection regardless of an existing target + data cube, refer to :py:meth:`resample_spatial`. + + :param target: A data cube that describes the spatial target resolution. + :param method: Resampling method to use. + :return: + """ + return self.process("resample_cube_spatial", {"data": self, "target": target, "method": method})
+ + +
+[docs] + @openeo_process + def resample_cube_temporal( + self, target: DataCube, dimension: Optional[str] = None, valid_within: Optional[int] = None + ) -> DataCube: + """ + Resamples one or more given temporal dimensions from a source data cube to align with the corresponding + dimensions of the given target data cube using the nearest neighbor method. + Returns a new data cube with the resampled dimensions. + + By default, this process simply takes the nearest neighbor independent of the value (including values such as + no-data / ``null``). Depending on the data cubes this may lead to values being assigned to two target timestamps. + To only consider valid values in a specific range around the target timestamps, use the parameter ``valid_within``. + + The rare case of ties is resolved by choosing the earlier timestamps. + + :param target: A data cube that describes the temporal target resolution. + :param dimension: The name of the temporal dimension to resample. + :param valid_within: + :return: + + .. versionadded:: 0.10.0 + """ + return self.process( + "resample_cube_temporal", + dict_no_none({"data": self, "target": target, "dimension": dimension, "valid_within": valid_within}) + )
+ + + def _operator_binary(self, operator: str, other: Union[DataCube, int, float], reverse=False) -> DataCube: + """Generic handling of (mathematical) binary operator""" + band_math_mode = self._in_bandmath_mode() + if band_math_mode: + if isinstance(other, (int, float)): + return self._bandmath_operator_binary_scalar(operator, other, reverse=reverse) + elif isinstance(other, DataCube): + return self._bandmath_operator_binary_cubes(operator, other) + else: + if isinstance(other, DataCube): + return self._merge_operator_binary_cubes(operator, other) + elif isinstance(other, (int, float)): + # "`apply` math" mode + return self._apply_operator( + operator=operator, other=other, reverse=reverse + ) + raise OperatorException( + f"Unsupported operator {operator!r} with `other` type {type(other)!r} (band math mode={band_math_mode})" + ) + + def _operator_unary(self, operator: str, **kwargs) -> DataCube: + band_math_mode = self._in_bandmath_mode() + if band_math_mode: + return self._bandmath_operator_unary(operator, **kwargs) + else: + return self._apply_operator(operator=operator, extra_arguments=kwargs) + + def _apply_operator( + self, + operator: str, + other: Optional[Union[int, float]] = None, + reverse: Optional[bool] = None, + extra_arguments: Optional[dict] = None, + ) -> DataCube: + """ + Apply a unary or binary operator/process, + by appending to existing `apply` node, or starting a new one. + + :param operator: process id of operator + :param other: for binary operators: "other" argument + :param reverse: for binary operators: "self" and "other" should be swapped (reflected operator mode) + """ + if self.result_node().process_id == "apply": + # Append to existing `apply` node + orig_apply = self.result_node() + data = orig_apply.arguments["data"] + x = {"from_node": orig_apply.arguments["process"]["process_graph"]} + context = orig_apply.arguments.get("context") + else: + # Start new `apply` node. + data = self + x = {"from_parameter": "x"} + context = None + # Build args for child callback. + args = {"x": x, **(extra_arguments or {})} + if other is not None: + # Binary operator mode + args["y"] = other + if reverse: + args["x"], args["y"] = args["y"], args["x"] + child_pg = PGNode(process_id=operator, arguments=args) + return self.process_with_node( + PGNode( + process_id="apply", + arguments=dict_no_none( + data=data, + process={"process_graph": child_pg}, + context=context, + ), + ) + ) + +
+[docs] + @openeo_process(mode="operator") + def add(self, other: Union[DataCube, int, float], reverse=False) -> DataCube: + return self._operator_binary("add", other, reverse=reverse)
+ + +
+[docs] + @openeo_process(mode="operator") + def subtract(self, other: Union[DataCube, int, float], reverse=False) -> DataCube: + return self._operator_binary("subtract", other, reverse=reverse)
+ + +
+[docs] + @openeo_process(mode="operator") + def divide(self, other: Union[DataCube, int, float], reverse=False) -> DataCube: + return self._operator_binary("divide", other, reverse=reverse)
+ + +
+[docs] + @openeo_process(mode="operator") + def multiply(self, other: Union[DataCube, int, float], reverse=False) -> DataCube: + return self._operator_binary("multiply", other, reverse=reverse)
+ + +
+[docs] + @openeo_process + def normalized_difference(self, other: DataCube) -> DataCube: + # This DataCube method is only a convenience function when in band math mode + assert self._in_bandmath_mode() + assert other._in_bandmath_mode() + return self._operator_binary("normalized_difference", other)
+ + +
+[docs] + @openeo_process(process_id="or", mode="operator") + def logical_or(self, other: DataCube) -> DataCube: + """ + Apply element-wise logical `or` operation + + :param other: + :return: logical_or(this, other) + """ + return self._operator_binary("or", other)
+ + +
+[docs] + @openeo_process(process_id="and", mode="operator") + def logical_and(self, other: DataCube) -> DataCube: + """ + Apply element-wise logical `and` operation + + :param other: + :return: logical_and(this, other) + """ + return self._operator_binary("and", other)
+ + + @openeo_process(process_id="not", mode="operator") + def __invert__(self) -> DataCube: + return self._operator_unary("not") + + @openeo_process(process_id="neq", mode="operator") + def __ne__(self, other: Union[DataCube, int, float]) -> DataCube: + return self._operator_binary("neq", other) + + @openeo_process(process_id="eq", mode="operator") + def __eq__(self, other: Union[DataCube, int, float]) -> DataCube: + """ + Pixelwise comparison of this data cube with another cube or constant. + + :param other: Another data cube, or a constant + :return: + """ + return self._operator_binary("eq", other) + + @openeo_process(process_id="gt", mode="operator") + def __gt__(self, other: Union[DataCube, int, float]) -> DataCube: + """ + Pairwise comparison of the bands in this data cube with the bands in the 'other' data cube. + + :param other: + :return: this > other + """ + return self._operator_binary("gt", other) + + @openeo_process(process_id="ge", mode="operator") + def __ge__(self, other: Union[DataCube, int, float]) -> DataCube: + return self._operator_binary("gte", other) + + @openeo_process(process_id="lt", mode="operator") + def __lt__(self, other: Union[DataCube, int, float]) -> DataCube: + """ + Pairwise comparison of the bands in this data cube with the bands in the 'other' data cube. + The number of bands in both data cubes has to be the same. + + :param other: + :return: this < other + """ + return self._operator_binary("lt", other) + + @openeo_process(process_id="le", mode="operator") + def __le__(self, other: Union[DataCube, int, float]) -> DataCube: + return self._operator_binary("lte", other) + + @openeo_process(process_id="add", mode="operator") + def __add__(self, other) -> DataCube: + return self.add(other) + + @openeo_process(process_id="add", mode="operator") + def __radd__(self, other) -> DataCube: + return self.add(other, reverse=True) + + @openeo_process(process_id="subtract", mode="operator") + def __sub__(self, other) -> DataCube: + return self.subtract(other) + + @openeo_process(process_id="subtract", mode="operator") + def __rsub__(self, other) -> DataCube: + return self.subtract(other, reverse=True) + + @openeo_process(process_id="multiply", mode="operator") + def __neg__(self) -> DataCube: + return self.multiply(-1) + + @openeo_process(process_id="multiply", mode="operator") + def __mul__(self, other) -> DataCube: + return self.multiply(other) + + @openeo_process(process_id="multiply", mode="operator") + def __rmul__(self, other) -> DataCube: + return self.multiply(other, reverse=True) + + @openeo_process(process_id="divide", mode="operator") + def __truediv__(self, other) -> DataCube: + return self.divide(other) + + @openeo_process(process_id="divide", mode="operator") + def __rtruediv__(self, other) -> DataCube: + return self.divide(other, reverse=True) + + @openeo_process(process_id="power", mode="operator") + def __rpow__(self, other) -> DataCube: + return self._power(other, reverse=True) + + @openeo_process(process_id="power", mode="operator") + def __pow__(self, other) -> DataCube: + return self._power(other, reverse=False) + + def _power(self, other, reverse=False): + node = self._get_bandmath_node() + x = node.reducer_process_graph() + y = other + if reverse: + x, y = y, x + return self.process_with_node(node.clone_with_new_reducer( + PGNode(process_id="power", base=x, p=y) + )) + +
+[docs] + @openeo_process(process_id="power", mode="operator") + def power(self, p: float): + return self._power(other=p, reverse=False)
+ + +
+[docs] + @openeo_process(process_id="ln", mode="operator") + def ln(self) -> DataCube: + return self._operator_unary("ln")
+ + +
+[docs] + @openeo_process(process_id="log", mode="operator") + def logarithm(self, base: float) -> DataCube: + return self._operator_unary("log", base=base)
+ + +
+[docs] + @openeo_process(process_id="log", mode="operator") + def log2(self) -> DataCube: + return self.logarithm(base=2)
+ + +
+[docs] + @openeo_process(process_id="log", mode="operator") + def log10(self) -> DataCube: + return self.logarithm(base=10)
+ + + @openeo_process(process_id="or", mode="operator") + def __or__(self, other) -> DataCube: + return self.logical_or(other) + + @openeo_process(process_id="and", mode="operator") + def __and__(self, other): + return self.logical_and(other) + + def _bandmath_operator_binary_cubes( + self, operator, other: DataCube, left_arg_name="x", right_arg_name="y" + ) -> DataCube: + """Band math binary operator with cube as right hand side argument""" + left = self._get_bandmath_node() + right = other._get_bandmath_node() + if left.arguments["data"] != right.arguments["data"]: + raise BandMathException("'Band math' between bands of different data cubes is not supported yet.") + + # Build reducer's sub-processgraph + merged = PGNode( + process_id=operator, + arguments={ + left_arg_name: {"from_node": left.reducer_process_graph()}, + right_arg_name: {"from_node": right.reducer_process_graph()}, + }, + ) + return self.process_with_node(left.clone_with_new_reducer(merged)) + + def _bandmath_operator_binary_scalar(self, operator: str, other: Union[int, float], reverse=False) -> DataCube: + """Band math binary operator with scalar value (int or float) as right hand side argument""" + node = self._get_bandmath_node() + x = {'from_node': node.reducer_process_graph()} + y = other + if reverse: + x, y = y, x + return self.process_with_node(node.clone_with_new_reducer( + PGNode(operator, x=x, y=y) + )) + + def _bandmath_operator_unary(self, operator: str, **kwargs) -> DataCube: + node = self._get_bandmath_node() + return self.process_with_node(node.clone_with_new_reducer( + PGNode(operator, x={'from_node': node.reducer_process_graph()}, **kwargs) + )) + + def _in_bandmath_mode(self) -> bool: + """So-called "band math" mode: current result node is reduce_dimension along "bands" dimension.""" + # TODO #123 is it (still) necessary to make "band" math a special case? + return isinstance(self._pg, ReduceNode) and self._pg.band_math_mode + + def _get_bandmath_node(self) -> ReduceNode: + """Check we are in bandmath mode and return the node""" + if not self._in_bandmath_mode(): + raise BandMathException("Must be in band math mode already") + return self._pg + + def _merge_operator_binary_cubes( + self, operator: str, other: DataCube, left_arg_name="x", right_arg_name="y" + ) -> DataCube: + """Merge two cubes with given operator as overlap_resolver.""" + # TODO #123 reuse an existing merge_cubes process graph if it already exists? + return self.merge_cubes(other, overlap_resolver=PGNode( + process_id=operator, + arguments={ + left_arg_name: {"from_parameter": "x"}, + right_arg_name: {"from_parameter": "y"}, + } + )) + + def _get_geometry_argument( + self, + argument: Union[ + shapely.geometry.base.BaseGeometry, + dict, + str, + pathlib.Path, + Parameter, + _FromNodeMixin, + ], + valid_geojson_types: List[str], + crs: Optional[str] = None, + ) -> Union[dict, Parameter, PGNode]: + """ + Convert input to a geometry as "geojson" subtype object or vectorcube. + + :param crs: value that encodes a coordinate reference system. + See :py:func:`openeo.util.normalize_crs` for more details about additional normalization that is applied to this argument. + """ + if isinstance(argument, Parameter): + return argument + elif isinstance(argument, _FromNodeMixin): + return argument.from_node() + + if isinstance(argument, str) and re.match(r"^https?://", argument, flags=re.I): + # Geometry provided as URL: load with `load_url` (with best-effort format guess) + url = urllib.parse.urlparse(argument) + suffix = pathlib.Path(url.path.lower()).suffix + format = { + ".json": "GeoJSON", + ".geojson": "GeoJSON", + ".pq": "Parquet", + ".parquet": "Parquet", + ".geoparquet": "Parquet", + }.get(suffix, suffix.split(".")[-1]) + return self.connection.load_url(url=argument, format=format) + + if ( + isinstance(argument, (str, pathlib.Path)) + and pathlib.Path(argument).is_file() + and pathlib.Path(argument).suffix.lower() in [".json", ".geojson"] + ): + geometry = load_json(argument) + elif isinstance(argument, shapely.geometry.base.BaseGeometry): + geometry = mapping(argument) + elif isinstance(argument, dict): + geometry = argument + else: + raise OpenEoClientException(f"Invalid geometry argument: {argument!r}") + + if geometry.get("type") not in valid_geojson_types: + raise OpenEoClientException("Invalid geometry type {t!r}, must be one of {s}".format( + t=geometry.get("type"), s=valid_geojson_types + )) + if crs: + # TODO: don't warn when the crs is Lon-Lat like EPSG:4326? + warnings.warn(f"Geometry with non-Lon-Lat CRS {crs!r} is only supported by specific back-ends.") + # TODO #204 alternative for non-standard CRS in GeoJSON object? + epsg_code = normalize_crs(crs) + if epsg_code is not None: + # proj did recognize the CRS + crs_name = f"EPSG:{epsg_code}" + else: + # proj did not recognise this CRS + warnings.warn(f"non-Lon-Lat CRS {crs!r} is not known to the proj library and might not be supported.") + crs_name = crs + geometry["crs"] = {"type": "name", "properties": {"name": crs_name}} + return geometry + +
+[docs] + @openeo_process + def aggregate_spatial( + self, + geometries: Union[ + shapely.geometry.base.BaseGeometry, + dict, + str, + pathlib.Path, + Parameter, + VectorCube, + ], + reducer: Union[str, typing.Callable, PGNode], + target_dimension: Optional[str] = None, + crs: Optional[Union[int, str]] = None, + context: Optional[dict] = None, + # TODO arguments: target dimension, context + ) -> VectorCube: + """ + Aggregates statistics for one or more geometries (e.g. zonal statistics for polygons) + over the spatial dimensions. + + :param geometries: The geometries to aggregate in. Can be provided in different ways: + + - a shapely geometry + - a GeoJSON-style dictionary, + - a public URL to the geometries in a vector format that is supported by the backend + (also see :py:func:`Connection.list_file_formats() <openeo.rest.connection.Connection.list_file_formats>`), + e.g. GeoJSON, GeoParquet, etc. + A ``load_url`` process will automatically be added to the process graph. + - a path (:py:class:`str` or :py:class:`~pathlib.Path`) to a local, client-side GeoJSON file, + which will be loaded automatically to get the geometries as GeoJSON construct. + - a :py:class:`~openeo.rest.vectorcube.VectorCube` instance. + - a :py:class:`~openeo.api.process.Parameter` instance. + + :param reducer: the "child callback": + the name of a single openEO process, + or a callback function as discussed in :ref:`callbackfunctions`, + or a :py:class:`UDF <openeo.rest._datacube.UDF>` instance. + + The callback should correspond to a process that + receives an array of numerical values + and returns a single numerical value. + For example: + + - ``"mean"`` (string) + - :py:func:`absolute <openeo.processes.max>` (:ref:`predefined openEO process function <openeo_processes_functions>`) + - ``lambda data: data.min()`` (function or lambda) + + :param target_dimension: The new dimension name to be used for storing the results. + :param crs: The spatial reference system of the provided polygon. + By default, longitude-latitude (EPSG:4326) is assumed. + See :py:func:`openeo.util.normalize_crs` for more details about additional normalization that is applied to this argument. + + .. note:: this ``crs`` argument is a non-standard/experimental feature, only supported by specific back-ends. + See https://github.com/Open-EO/openeo-processes/issues/235 for details. + + :param context: Additional data to be passed to the reducer process. + + .. versionchanged:: 0.36.0 + Support passing a URL as ``geometries`` argument, which will be loaded with the ``load_url`` process. + + .. versionchanged:: 0.36.0 + Support for passing a backend-side path as ``geometries`` argument was removed + (also see :ref:`legacy_read_vector`). + Instead, it's possible to provide a client-side path to a GeoJSON file + (which will be loaded client-side to get the geometries as GeoJSON construct). + """ + valid_geojson_types = [ + "Point", "MultiPoint", "LineString", "MultiLineString", + "Polygon", "MultiPolygon", "GeometryCollection", "Feature", "FeatureCollection" + ] + geometries = self._get_geometry_argument(geometries, valid_geojson_types=valid_geojson_types, crs=crs) + reducer = build_child_callback(reducer, parent_parameters=["data"]) + return VectorCube( + graph=self._build_pgnode( + process_id="aggregate_spatial", + data=THIS, + geometries=geometries, + reducer=reducer, + arguments=dict_no_none( + target_dimension=target_dimension, context=context + ), + ), + connection=self._connection, + # TODO: also add new "geometry" dimension #457 + metadata=None if self.metadata is None else self.metadata.reduce_spatial(), + )
+ + +
+[docs] + @openeo_process + def aggregate_spatial_window( + self, + reducer: Union[str, typing.Callable, PGNode], + size: List[int], + boundary: str = "pad", + align: str = "upper-left", + context: Optional[dict] = None, + # TODO arguments: target dimension, context + ) -> DataCube: + """ + Aggregates statistics over the horizontal spatial dimensions (axes x and y) of the data cube. + + The pixel grid for the axes x and y is divided into non-overlapping windows with the size + specified in the parameter size. If the number of values for the axes x and y is not a multiple + of the corresponding window size, the behavior specified in the parameters boundary and align + is applied. For each of these windows, the reducer process computes the result. + + :param reducer: the "child callback": + the name of a single openEO process, + or a callback function as discussed in :ref:`callbackfunctions`, + or a :py:class:`UDF <openeo.rest._datacube.UDF>` instance. + :param size: Window size in pixels along the horizontal spatial dimensions. + The first value corresponds to the x axis, the second value corresponds to the y axis. + :param boundary: Behavior to apply if the number of values for the axes x and y is not a + multiple of the corresponding value in the size parameter. + Options are: + + - ``pad`` (default): pad the data cube with the no-data value null to fit the required window size. + - ``trim``: trim the data cube to fit the required window size. + + Use the parameter ``align`` to align the data to the desired corner. + + :param align: If the data requires padding or trimming (see parameter ``boundary``), specifies + to which corner of the spatial extent the data is aligned to. For example, if the data is + aligned to the upper left, the process pads/trims at the lower-right. + :param context: Additional data to be passed to the process. + + :return: A data cube with the newly computed values and the same dimensions. + """ + valid_boundary_types = ["pad", "trim"] + valid_align_types = ["lower-left", "upper-left", "lower-right", "upper-right"] + if boundary not in valid_boundary_types: + raise ValueError(f"Provided boundary type not supported. Please use one of {valid_boundary_types} .") + if align not in valid_align_types: + raise ValueError(f"Provided align type not supported. Please use one of {valid_align_types} .") + if len(size) != 2: + raise ValueError(f"Provided size not supported. Please provide a list of 2 integer values.") + + reducer = build_child_callback(reducer, parent_parameters=["data"]) + arguments = { + "data": THIS, + "boundary": boundary, + "align": align, + "size": size, + "reducer": reducer, + "context": context, + } + return self.process(process_id="aggregate_spatial_window", arguments=arguments)
+ + +
+[docs] + @openeo_process + def apply_dimension( + self, + code: Optional[str] = None, + runtime=None, + # TODO: drop None default of process (when `code` and `runtime` args can be dropped) + process: Union[str, typing.Callable, UDF, PGNode] = None, + version: Optional[str] = None, + # TODO: dimension has no default (per spec)? + dimension: str = "t", + target_dimension: Optional[str] = None, + context: Optional[dict] = None, + ) -> DataCube: + """ + Applies a process to all pixel values along a dimension of a raster data cube. For example, + if the temporal dimension is specified the process will work on a time series of pixel values. + + The process to apply is specified by either `code` and `runtime` in case of a UDF, or by providing a callback function + in the `process` argument. + + The process reduce_dimension also applies a process to pixel values along a dimension, but drops + the dimension afterwards. The process apply applies a process to each pixel value in the data cube. + + The target dimension is the source dimension if not specified otherwise in the target_dimension parameter. + The pixel values in the target dimension get replaced by the computed pixel values. The name, type and + reference system are preserved. + + The dimension labels are preserved when the target dimension is the source dimension and the number of + pixel values in the source dimension is equal to the number of values computed by the process. Otherwise, + the dimension labels will be incrementing integers starting from zero, which can be changed using + rename_labels afterwards. The number of labels will equal to the number of values computed by the process. + + :param code: [**deprecated**] UDF code or process identifier (optional) + :param runtime: [**deprecated**] UDF runtime to use (optional) + :param process: the "child callback": + the name of a single process, + or a callback function as discussed in :ref:`callbackfunctions`, + or a :py:class:`UDF <openeo.rest._datacube.UDF>` instance. + + The callback should correspond to a process that + receives an array of numerical values + and returns an array of numerical values. + For example: + + - ``"sort"`` (string) + - :py:func:`sort <openeo.processes.sort>` (:ref:`predefined openEO process function <openeo_processes_functions>`) + - ``lambda data: data.concat([42, -3])`` (function or lambda) + + + :param version: [**deprecated**] Version of the UDF runtime to use + :param dimension: The name of the source dimension to apply the process on. Fails with a DimensionNotAvailable error if the specified dimension does not exist. + :param target_dimension: The name of the target dimension or null (the default) to use the source dimension + specified in the parameter dimension. By specifying a target dimension, the source dimension is removed. + The target dimension with the specified name and the type other (see add_dimension) is created, if it doesn't exist yet. + :param context: Additional data to be passed to the process. + + :return: A datacube with the UDF applied to the given dimension. + :raises: DimensionNotAvailable + + .. versionchanged:: 0.13.0 + arguments ``code``, ``runtime`` and ``version`` are deprecated if favor of the standard approach + of using an :py:class:`UDF <openeo.rest._datacube.UDF>` object in the ``process`` argument. + See :ref:`old_udf_api` for more background about the changes. + + """ + # TODO #137 #181 #312 remove support for code/runtime/version + if runtime or (isinstance(code, str) and "\n" in code) or version: + if process: + raise ValueError( + "Cannot specify `process` argument together with deprecated `code`/`runtime`/`version` arguments." + ) + else: + warnings.warn( + "Specifying UDF code through `code`, `runtime` and `version` arguments is deprecated. " + "Instead create an `openeo.UDF` object and pass that to the `process` argument.", + category=UserDeprecationWarning, + stacklevel=2, + ) + process = UDF(code=code, runtime=runtime, version=version, context=context) + else: + process = process or code + process = build_child_callback( + process=process, parent_parameters=["data", "context"], connection=self.connection + ) + arguments = { + "data": THIS, + "process": process, + "dimension": self._assert_valid_dimension_name(dimension), + } + + metadata = self.metadata + if target_dimension is not None: + arguments["target_dimension"] = target_dimension + metadata = self.metadata.reduce_dimension(dimension_name=dimension) if self.metadata else None + if(not target_dimension in self.metadata.dimension_names()): + metadata = self.metadata.add_dimension(target_dimension, label="unknown") + if context is not None: + arguments["context"] = context + result_cube = self.process(process_id="apply_dimension", arguments=arguments, metadata = metadata) + + return result_cube
+ + +
+[docs] + @openeo_process + def reduce_dimension( + self, + dimension: str, + reducer: Union[str, typing.Callable, UDF, PGNode], + context: Optional[dict] = None, + process_id="reduce_dimension", + band_math_mode: bool = False, + ) -> DataCube: + """ + Add a reduce process with given reducer callback along given dimension + + :param dimension: the label of the dimension to reduce + :param reducer: the "child callback": + the name of a single openEO process, + or a callback function as discussed in :ref:`callbackfunctions`, + or a :py:class:`UDF <openeo.rest._datacube.UDF>` instance. + + The callback should correspond to a process that + receives an array of numerical values + and returns a single numerical value. + For example: + + - ``"mean"`` (string) + - :py:func:`absolute <openeo.processes.max>` (:ref:`predefined openEO process function <openeo_processes_functions>`) + - ``lambda data: data.min()`` (function or lambda) + + :param context: Additional data to be passed to the process. + """ + # TODO: check if dimension is valid according to metadata? #116 + # TODO: #125 use/test case for `reduce_dimension_binary`? + reducer = build_child_callback( + process=reducer, parent_parameters=["data", "context"], connection=self.connection + ) + + return self.process_with_node( + ReduceNode( + process_id=process_id, + data=self, + reducer=reducer, + dimension=self._assert_valid_dimension_name(dimension), + context=context, + # TODO #123 is it (still) necessary to make "band" math a special case? + band_math_mode=band_math_mode, + ), + metadata=self.metadata.reduce_dimension(dimension_name=dimension) if self.metadata else None, + )
+ + +
+[docs] + @openeo_process + def reduce_spatial( + self, + reducer: Union[str, typing.Callable, UDF, PGNode], + context: Optional[dict] = None, + ) -> "DataCube": + """ + Add a reduce process with given reducer callback along the spatial dimensions + + :param reducer: the "child callback": + the name of a single openEO process, + or a callback function as discussed in :ref:`callbackfunctions`, + or a :py:class:`UDF <openeo.rest._datacube.UDF>` instance. + + The callback should correspond to a process that + receives an array of numerical values + and returns a single numerical value. + For example: + + - ``"mean"`` (string) + - :py:func:`absolute <openeo.processes.max>` (:ref:`predefined openEO process function <openeo_processes_functions>`) + - ``lambda data: data.min()`` (function or lambda) + + :param context: Additional data to be passed to the process. + """ + reducer = build_child_callback( + process=reducer, parent_parameters=["data", "context"], connection=self.connection + ) + return self.process( + process_id="reduce_spatial", + data=self, + reducer=reducer, + context=context, + metadata=self.metadata.reduce_spatial(), + )
+ + +
+[docs] + @deprecated("Use :py:meth:`apply_polygon`.", version="0.26.0") + def chunk_polygon( + self, + chunks: Union[shapely.geometry.base.BaseGeometry, dict, str, pathlib.Path, Parameter, VectorCube], + process: Union[str, PGNode, typing.Callable, UDF], + mask_value: float = None, + context: Optional[dict] = None, + ) -> DataCube: + """""" + process = build_child_callback(process, parent_parameters=["data"], connection=self.connection) + valid_geojson_types = [ + "Polygon", + "MultiPolygon", + "GeometryCollection", + "Feature", + "FeatureCollection", + ] + chunks = self._get_geometry_argument( + chunks, valid_geojson_types=valid_geojson_types + ) + mask_value = float(mask_value) if mask_value is not None else None + return self.process( + process_id="chunk_polygon", + data=THIS, + chunks=chunks, + process=process, + arguments=dict_no_none( + mask_value=mask_value, + context=context, + ), + )
+ + +
+[docs] + @openeo_process + def apply_polygon( + self, + geometries: Union[shapely.geometry.base.BaseGeometry, dict, str, pathlib.Path, Parameter, VectorCube] = None, + process: Union[str, PGNode, typing.Callable, UDF] = None, + mask_value: Optional[float] = None, + context: Optional[dict] = None, + **kwargs, + ) -> DataCube: + """ + Apply a process to segments of the data cube that are defined by the given polygons. + For each polygon provided, all pixels for which the point at the pixel center intersects + with the polygon (as defined in the Simple Features standard by the OGC) are collected into sub data cubes. + If a pixel is part of multiple of the provided polygons (e.g., when the polygons overlap), + the GeometriesOverlap exception is thrown. + Each sub data cube is passed individually to the given process. + + :param geometries: Can be provided in different ways: + + - a shapely geometry + - a GeoJSON-style dictionary, + - a public URL to the geometries in a vector format that is supported by the backend + (also see :py:func:`Connection.list_file_formats() <openeo.rest.connection.Connection.list_file_formats>`), + e.g. GeoJSON, GeoParquet, etc. + A ``load_url`` process will automatically be added to the process graph. + - a path (:py:class:`str` or :py:class:`~pathlib.Path`) to a local, client-side GeoJSON file, + which will be loaded automatically to get the geometries as GeoJSON construct. + - a :py:class:`~openeo.rest.vectorcube.VectorCube` instance. + - a :py:class:`~openeo.api.process.Parameter` instance. + + :param process: "child callback" function, see :ref:`callbackfunctions` + :param mask_value: The value used for pixels outside the polygon. + :param context: Additional data to be passed to the process. + + .. warning:: experimental process: not generally supported, API subject to change. + + .. versionchanged:: 0.32.0 + Argument ``polygons`` was renamed to ``geometries``. + While deprecated, the old name ``polygons`` is still supported + as keyword argument for backwards compatibility. + + .. versionchanged:: 0.36.0 + Support passing a URL as ``geometries`` argument, which will be loaded with the ``load_url`` process. + + .. versionchanged:: 0.36.0 + Support for passing a backend-side path as ``geometries`` argument was removed + (also see :ref:`legacy_read_vector`). + Instead, it's possible to provide a client-side path to a GeoJSON file + (which will be loaded client-side to get the geometries as GeoJSON construct). + """ + # TODO drop support for legacy `polygons` argument: + # remove `kwargs, remove default `None` value for `geometries` and `process` + # and the related backwards compatibility code + geometries_parameter = "geometries" + if geometries is None and "polygons" in kwargs: + geometries = kwargs.pop("polygons") + geometries_parameter = "polygons" + warnings.warn( + "In `apply_polygon` use argument `geometries` instead of deprecated 'polygons'.", + category=UserDeprecationWarning, + stacklevel=2, + ) + if kwargs: + raise ValueError(f"Unexpected keyword arguments: {kwargs!r}") + if not geometries: + raise ValueError("No geometries provided.") + + # Note: the `process` argument was given a default value `None` (with the `polygons`/`geometries` argument rename) + # to keep support for legacy `cube.apply_polygon(polygons=..., process=...)` usage: + # `geometries` had to be given a default value, and so did `process` as it comes after it. + # TODO: remove default value for `process` when dropping support for legacy `polygons` argument + assert process is not None + + process = build_child_callback(process, parent_parameters=["data"], connection=self.connection) + valid_geojson_types = ["Polygon", "MultiPolygon", "Feature", "FeatureCollection"] + geometries = self._get_geometry_argument(geometries, valid_geojson_types=valid_geojson_types) + mask_value = float(mask_value) if mask_value is not None else None + return self.process( + process_id="apply_polygon", + data=THIS, + **{geometries_parameter: geometries}, + process=process, + arguments=dict_no_none( + mask_value=mask_value, + context=context, + ), + )
+ + +
+[docs] + def reduce_bands(self, reducer: Union[str, PGNode, typing.Callable, UDF]) -> DataCube: + """ + Shortcut for :py:meth:`reduce_dimension` along the band dimension + + :param reducer: "child callback" function, see :ref:`callbackfunctions` + """ + return self.reduce_dimension( + dimension=self.metadata.band_dimension.name if self.metadata else "bands", + reducer=reducer, + band_math_mode=True, + )
+ + +
+[docs] + def reduce_temporal(self, reducer: Union[str, PGNode, typing.Callable, UDF]) -> DataCube: + """ + Shortcut for :py:meth:`reduce_dimension` along the temporal dimension + + :param reducer: "child callback" function, see :ref:`callbackfunctions` + """ + return self.reduce_dimension( + dimension=self.metadata.temporal_dimension.name if self.metadata else "t", + reducer=reducer, + )
+ + +
+[docs] + @deprecated( + "Use :py:meth:`reduce_bands` with :py:class:`UDF <openeo.rest._datacube.UDF>` as reducer.", + version="0.13.0", + ) + def reduce_bands_udf(self, code: str, runtime: Optional[str] = None, version: Optional[str] = None) -> DataCube: + """ + Use `reduce_dimension` process with given UDF along band/spectral dimension. + """ + # TODO #181 #312 drop this deprecated pattern + return self.reduce_bands(reducer=UDF(code=code, runtime=runtime, version=version))
+ + +
+[docs] + @openeo_process + def add_dimension(self, name: str, label: str, type: Optional[str] = None): + """ + Adds a new named dimension to the data cube. + Afterwards, the dimension can be referenced with the specified name. If a dimension with the specified name exists, + the process fails with a DimensionExists error. The dimension label of the dimension is set to the specified label. + + This call does not modify the datacube in place, but returns a new datacube with the additional dimension. + + :param name: The name of the dimension to add + :param label: The dimension label. + :param type: Dimension type, allowed values: 'spatial', 'temporal', 'bands', 'other', default value is 'other' + :return: The data cube with a newly added dimension. The new dimension has exactly one dimension label. All other dimensions remain unchanged. + """ + return self.process( + process_id="add_dimension", + arguments=dict_no_none({"data": self, "name": name, "label": label, "type": type}), + metadata=self.metadata.add_dimension(name=name, label=label, type=type) if self.metadata else None, + )
+ + +
+[docs] + @openeo_process + def drop_dimension(self, name: str): + """ + Drops a dimension from the data cube. + Dropping a dimension only works on dimensions with a single dimension label left, otherwise the process fails + with a DimensionLabelCountMismatch exception. Dimension values can be reduced to a single value with a filter + such as filter_bands or the reduce_dimension process. If a dimension with the specified name does not exist, + the process fails with a DimensionNotAvailable exception. + + :param name: The name of the dimension to drop + :return: The data cube with the given dimension dropped. + """ + return self.process( + process_id="drop_dimension", + arguments={"data": self, "name": name}, + metadata=self.metadata.drop_dimension(name=name) if self.metadata else None, + )
+ + +
+[docs] + @deprecated( + "Use :py:meth:`reduce_temporal` with :py:class:`UDF <openeo.rest._datacube.UDF>` as reducer", + version="0.13.0", + ) + def reduce_temporal_udf(self, code: str, runtime="Python", version="latest"): + """ + Apply reduce (`reduce_dimension`) process with given UDF along temporal dimension. + + :param code: The UDF code, compatible with the given runtime and version + :param runtime: The UDF runtime + :param version: The UDF runtime version + """ + # TODO #181 #312 drop this deprecated pattern + return self.reduce_temporal(reducer=UDF(code=code, runtime=runtime, version=version))
+ + + reduce_tiles_over_time = legacy_alias( + reduce_temporal_udf, name="reduce_tiles_over_time", since="0.1.1" + ) + +
+[docs] + @openeo_process + def apply_neighborhood( + self, + process: Union[str, PGNode, typing.Callable, UDF], + size: List[Dict], + overlap: List[dict] = None, + context: Optional[dict] = None, + ) -> DataCube: + """ + Applies a focal process to a data cube. + + A focal process is a process that works on a 'neighbourhood' of pixels. The neighbourhood can extend into multiple dimensions, this extent is specified by the `size` argument. It is not only (part of) the size of the input window, but also the size of the output for a given position of the sliding window. The sliding window moves with multiples of `size`. + + An overlap can be specified so that neighbourhoods can have overlapping boundaries. This allows for continuity of the output. The values included in the data cube as overlap can't be modified by the given `process`. + + The neighbourhood size should be kept small enough, to avoid running beyond computational resources, but a too small size will result in a larger number of process invocations, which may slow down processing. Window sizes for spatial dimensions typically are in the range of 64 to 512 pixels, while overlaps of 8 to 32 pixels are common. + + The process must not add new dimensions, or remove entire dimensions, but the result can have different dimension labels. + + For the special case of 2D convolution, it is recommended to use ``apply_kernel()``. + + :param size: + :param overlap: + :param process: a callback function that creates a process graph, see :ref:`callbackfunctions` + :param context: Additional data to be passed to the process. + + :return: + """ + return self.process( + process_id="apply_neighborhood", + arguments=dict_no_none( + data=THIS, + process=build_child_callback(process=process, parent_parameters=["data"], connection=self.connection), + size=size, + overlap=overlap, + context=context, + ) + )
+ + +
+[docs] + @openeo_process + def apply( + self, + process: Union[str, typing.Callable, UDF, PGNode], + context: Optional[dict] = None, + ) -> DataCube: + """ + Applies a unary process (a local operation) to each value of the specified or all dimensions in the data cube. + + :param process: the "child callback": + the name of a single process, + or a callback function as discussed in :ref:`callbackfunctions`, + or a :py:class:`UDF <openeo.rest._datacube.UDF>` instance. + + The callback should correspond to a process that + receives a single numerical value + and returns a single numerical value. + For example: + + - ``"absolute"`` (string) + - :py:func:`absolute <openeo.processes.absolute>` (:ref:`predefined openEO process function <openeo_processes_functions>`) + - ``lambda x: x * 2 + 3`` (function or lambda) + + :param context: Additional data to be passed to the process. + + :return: A data cube with the newly computed values. The resolution, cardinality and the number of dimensions are the same as for the original data cube. + """ + return self.process( + process_id="apply", + arguments=dict_no_none( + { + "data": THIS, + "process": build_child_callback(process, parent_parameters=["x"], connection=self.connection), + "context": context, + } + ), + )
+ + + reduce_temporal_simple = legacy_alias( + reduce_temporal, "reduce_temporal_simple", since="0.13.0" + ) + +
+[docs] + @openeo_process(process_id="min", mode="reduce_dimension") + def min_time(self) -> DataCube: + """ + Finds the minimum value of a time series for all bands of the input dataset. + + :return: a DataCube instance + """ + return self.reduce_temporal("min")
+ + +
+[docs] + @openeo_process(process_id="max", mode="reduce_dimension") + def max_time(self) -> DataCube: + """ + Finds the maximum value of a time series for all bands of the input dataset. + + :return: a DataCube instance + """ + return self.reduce_temporal("max")
+ + +
+[docs] + @openeo_process(process_id="mean", mode="reduce_dimension") + def mean_time(self) -> DataCube: + """ + Finds the mean value of a time series for all bands of the input dataset. + + :return: a DataCube instance + """ + return self.reduce_temporal("mean")
+ + +
+[docs] + @openeo_process(process_id="median", mode="reduce_dimension") + def median_time(self) -> DataCube: + """ + Finds the median value of a time series for all bands of the input dataset. + + :return: a DataCube instance + """ + return self.reduce_temporal("median")
+ + +
+[docs] + @openeo_process(process_id="count", mode="reduce_dimension") + def count_time(self) -> DataCube: + """ + Counts the number of images with a valid mask in a time series for all bands of the input dataset. + + :return: a DataCube instance + """ + return self.reduce_temporal("count")
+ + +
+[docs] + @openeo_process + def aggregate_temporal( + self, + intervals: List[list], + reducer: Union[str, typing.Callable, PGNode], + labels: Optional[List[str]] = None, + dimension: Optional[str] = None, + context: Optional[dict] = None, + ) -> DataCube: + """ + Computes a temporal aggregation based on an array of date and/or time intervals. + + Calendar hierarchies such as year, month, week etc. must be transformed into specific intervals by the clients. For each interval, all data along the dimension will be passed through the reducer. The computed values will be projected to the labels, so the number of labels and the number of intervals need to be equal. + + If the dimension is not set, the data cube is expected to only have one temporal dimension. + + :param intervals: Temporal left-closed intervals so that the start time is contained, but not the end time. + :param reducer: the "child callback": + the name of a single openEO process, + or a callback function as discussed in :ref:`callbackfunctions`, + or a :py:class:`UDF <openeo.rest._datacube.UDF>` instance. + + The callback should correspond to a process that + receives an array of numerical values + and returns a single numerical value. + For example: + + - ``"mean"`` (string) + - :py:func:`absolute <openeo.processes.max>` (:ref:`predefined openEO process function <openeo_processes_functions>`) + - ``lambda data: data.min()`` (function or lambda) + + :param labels: Labels for the intervals. The number of labels and the number of groups need to be equal. + :param dimension: The temporal dimension for aggregation. All data along the dimension will be passed through the specified reducer. If the dimension is not set, the data cube is expected to only have one temporal dimension. + :param context: Additional data to be passed to the reducer. Not set by default. + + :return: A :py:class:`DataCube` containing a result for each time window + """ + return self.process( + process_id="aggregate_temporal", + arguments=dict_no_none( + data=THIS, + intervals=intervals, + labels=labels, + dimension=dimension, + reducer=build_child_callback(reducer, parent_parameters=["data"]), + context=context, + ), + )
+ + +
+[docs] + @openeo_process + def aggregate_temporal_period( + self, + period: str, + reducer: Union[str, PGNode, typing.Callable], + dimension: Optional[str] = None, + context: Optional[Dict] = None, + ) -> DataCube: + """ + Computes a temporal aggregation based on calendar hierarchies such as years, months or seasons. For other calendar hierarchies aggregate_temporal can be used. + + For each interval, all data along the dimension will be passed through the reducer. + + If the dimension is not set or is set to null, the data cube is expected to only have one temporal dimension. + + The period argument specifies the time intervals to aggregate. The following pre-defined values are available: + + - hour: Hour of the day + - day: Day of the year + - week: Week of the year + - dekad: Ten day periods, counted per year with three periods per month (day 1 - 10, 11 - 20 and 21 - end of month). The third dekad of the month can range from 8 to 11 days. For example, the fourth dekad is Feb, 1 - Feb, 10 each year. + - month: Month of the year + - season: Three month periods of the calendar seasons (December - February, March - May, June - August, September - November). + - tropical-season: Six month periods of the tropical seasons (November - April, May - October). + - year: Proleptic years + - decade: Ten year periods (0-to-9 decade), from a year ending in a 0 to the next year ending in a 9. + - decade-ad: Ten year periods (1-to-0 decade) better aligned with the Anno Domini (AD) calendar era, from a year ending in a 1 to the next year ending in a 0. + + + :param period: The period of the time intervals to aggregate. + :param reducer: A reducer to be applied on all values along the specified dimension. The reducer must be a callable process (or a set processes) that accepts an array and computes a single return value of the same type as the input values, for example median. + :param dimension: The temporal dimension for aggregation. All data along the dimension will be passed through the specified reducer. If the dimension is not set, the data cube is expected to only have one temporal dimension. + :param context: Additional data to be passed to the reducer. + + :return: A data cube with the same dimensions. The dimension properties (name, type, labels, reference system and resolution) remain unchanged. + """ + return self.process( + process_id="aggregate_temporal_period", + arguments=dict_no_none( + data=THIS, + period=period, + dimension=dimension, + reducer=build_child_callback(reducer, parent_parameters=["data"]), + context=context, + ), + )
+ + +
+[docs] + @openeo_process + def ndvi(self, nir: str = None, red: str = None, target_band: str = None) -> DataCube: + """ + Normalized Difference Vegetation Index (NDVI) + + :param nir: (optional) name of NIR band + :param red: (optional) name of red band + :param target_band: (optional) name of the newly created band + + :return: a DataCube instance + """ + if self.metadata is None: + metadata = None + elif target_band is None: + metadata = self.metadata.reduce_dimension(self.metadata.band_dimension.name) + else: + # TODO: first drop "bands" dim and re-add it with single "ndvi" band + metadata = self.metadata.append_band(Band(name=target_band, common_name="ndvi")) + return self.process( + process_id="ndvi", + arguments=dict_no_none( + data=THIS, nir=nir, red=red, target_band=target_band + ), + metadata=metadata, + )
+ + +
+[docs] + @openeo_process + def rename_dimension(self, source: str, target: str): + """ + Renames a dimension in the data cube while preserving all other properties. + + :param source: The current name of the dimension. Fails with a DimensionNotAvailable error if the specified dimension does not exist. + :param target: A new Name for the dimension. Fails with a DimensionExists error if a dimension with the specified name exists. + + :return: A new datacube with the dimension renamed. + """ + if self._do_metadata_normalization() and target in self.metadata.dimension_names(): + raise ValueError('Target dimension name conflicts with existing dimension: %s.' % target) + return self.process( + process_id="rename_dimension", + arguments=dict_no_none( + data=THIS, + source=self._assert_valid_dimension_name(source), + target=target, + ), + metadata=self.metadata.rename_dimension(source, target) if self.metadata else None, + )
+ + +
+[docs] + @openeo_process + def rename_labels(self, dimension: str, target: list, source: list = None) -> DataCube: + """ + Renames the labels of the specified dimension in the data cube from source to target. + + :param dimension: Dimension name + :param target: The new names for the labels. + :param source: The names of the labels as they are currently in the data cube. + + :return: An DataCube instance + """ + return self.process( + process_id="rename_labels", + arguments=dict_no_none( + data=THIS, + dimension=self._assert_valid_dimension_name(dimension), + target=target, + source=source, + ), + metadata=self.metadata.rename_labels(dimension, target, source) if self.metadata else None, + )
+ + +
+[docs] + @openeo_process(mode="apply") + def linear_scale_range(self, input_min, input_max, output_min, output_max) -> DataCube: + """ + Performs a linear transformation between the input and output range. + + The given number in x is clipped to the bounds specified in inputMin and inputMax so that the underlying formula + + ((x - inputMin) / (inputMax - inputMin)) * (outputMax - outputMin) + outputMin + + never returns any value lower than outputMin or greater than outputMax. + + Potential use case include scaling values to the 8-bit range (0 - 255) often used for numeric representation of + values in one of the channels of the RGB colour model or calculating percentages (0 - 100). + + The no-data value null is passed through and therefore gets propagated. + + :param input_min: Minimum input value + :param input_max: Maximum input value + :param output_min: Minimum value of the desired output range. + :param output_max: Maximum value of the desired output range. + :return: a DataCube instance + """ + + return self.apply(lambda x: x.linear_scale_range(input_min, input_max, output_min, output_max))
+ + +
+[docs] + @openeo_process + def mask(self, mask: DataCube = None, replacement=None) -> DataCube: + """ + Applies a mask to a raster data cube. To apply a vector mask use `mask_polygon`. + + A mask is a raster data cube for which corresponding pixels among `data` and `mask` + are compared and those pixels in `data` are replaced whose pixels in `mask` are non-zero + (for numbers) or true (for boolean values). + The pixel values are replaced with the value specified for `replacement`, + which defaults to null (no data). + + :param mask: the raster mask + :param replacement: the value to replace the masked pixels with + """ + return self.process( + process_id="mask", + arguments=dict_no_none(data=self, mask=mask, replacement=replacement), + )
+ + +
+[docs] + @openeo_process + def mask_polygon( + self, + mask: Union[shapely.geometry.base.BaseGeometry, dict, str, pathlib.Path, Parameter, VectorCube], + srs: str = None, + replacement=None, + inside: bool = None, + ) -> DataCube: + """ + Applies a polygon mask to a raster data cube. To apply a raster mask use `mask`. + + All pixels for which the point at the pixel center does not intersect with any + polygon (as defined in the Simple Features standard by the OGC) are replaced. + This behaviour can be inverted by setting the parameter `inside` to true. + + The pixel values are replaced with the value specified for `replacement`, + which defaults to `no data`. + + :param mask: The geometry to mask with.an be provided in different ways: + + - a shapely geometry + - a GeoJSON-style dictionary, + - a public URL to the geometries in a vector format that is supported by the backend + (also see :py:func:`Connection.list_file_formats() <openeo.rest.connection.Connection.list_file_formats>`), + e.g. GeoJSON, GeoParquet, etc. + A ``load_url`` process will automatically be added to the process graph. + - a path (:py:class:`str` or :py:class:`~pathlib.Path`) to a local, client-side GeoJSON file, + which will be loaded automatically to get the geometries as GeoJSON construct. + - a :py:class:`~openeo.rest.vectorcube.VectorCube` instance. + - a :py:class:`~openeo.api.process.Parameter` instance. + + :param srs: The spatial reference system of the provided polygon. + By default longitude-latitude (EPSG:4326) is assumed. + + .. note:: this ``srs`` argument is a non-standard/experimental feature, only supported by specific back-ends. + See https://github.com/Open-EO/openeo-processes/issues/235 for details. + :param replacement: the value to replace the masked pixels with + + .. versionchanged:: 0.36.0 + Support passing a URL as ``geometries`` argument, which will be loaded with the ``load_url`` process. + + .. versionchanged:: 0.36.0 + Support for passing a backend-side path as ``geometries`` argument was removed + (also see :ref:`legacy_read_vector`). + Instead, it's possible to provide a client-side path to a GeoJSON file + (which will be loaded client-side to get the geometries as GeoJSON construct). + """ + valid_geojson_types = ["Polygon", "MultiPolygon", "GeometryCollection", "Feature", "FeatureCollection"] + mask = self._get_geometry_argument(mask, valid_geojson_types=valid_geojson_types, crs=srs) + return self.process( + process_id="mask_polygon", + arguments=dict_no_none( + data=THIS, + mask=mask, + replacement=replacement, + inside=inside + ) + )
+ + +
+[docs] + @openeo_process + def merge_cubes( + self, + other: DataCube, + overlap_resolver: Union[str, PGNode, typing.Callable] = None, + context: Optional[dict] = None, + ) -> DataCube: + """ + Merging two data cubes + + The data cubes have to be compatible. A merge operation without overlap should be reversible with (a set of) filter operations for each of the two cubes. The process performs the join on overlapping dimensions, with the same name and type. + An overlapping dimension has the same name, type, reference system and resolution in both dimensions, but can have different labels. One of the dimensions can have different labels, for all other dimensions the labels must be equal. If data overlaps, the parameter overlap_resolver must be specified to resolve the overlap. + + Examples for merging two data cubes: + + #. Data cubes with the dimensions x, y, t and bands have the same dimension labels in x,y and t, but the labels for the dimension bands are B1 and B2 for the first cube and B3 and B4. An overlap resolver is not needed. The merged data cube has the dimensions x, y, t and bands and the dimension bands has four dimension labels: B1, B2, B3, B4. + #. Data cubes with the dimensions x, y, t and bands have the same dimension labels in x,y and t, but the labels for the dimension bands are B1 and B2 for the first data cube and B2 and B3 for the second. An overlap resolver is required to resolve overlap in band B2. The merged data cube has the dimensions x, y, t and bands and the dimension bands has three dimension labels: B1, B2, B3. + #. Data cubes with the dimensions x, y and t have the same dimension labels in x,y and t. There are two options: + * Keep the overlapping values separately in the merged data cube: An overlap resolver is not needed, but for each data cube you need to add a new dimension using add_dimension. The new dimensions must be equal, except that the labels for the new dimensions must differ by name. The merged data cube has the same dimensions and labels as the original data cubes, plus the dimension added with add_dimension, which has the two dimension labels after the merge. + * Combine the overlapping values into a single value: An overlap resolver is required to resolve the overlap for all pixels. The merged data cube has the same dimensions and labels as the original data cubes, but all pixel values have been processed by the overlap resolver. + #. Merging a data cube with dimensions x, y, t with another cube with dimensions x, y will join on the x, y dimension, so the lower dimension cube is merged with each time step in the higher dimensional cube. This can for instance be used to apply a digital elevation model to a spatiotemporal data cube. + + :param other: The data cube to merge with. + :param overlap_resolver: A reduction operator that resolves the conflict if the data overlaps. The reducer must return a value of the same data type as the input values are. The reduction operator may be a single process such as multiply or consist of multiple sub-processes. null (the default) can be specified if no overlap resolver is required. + :param context: Additional data to be passed to the process. + + :return: The merged data cube. + """ + arguments = {"cube1": self, "cube2": other} + if overlap_resolver: + arguments["overlap_resolver"] = build_child_callback(overlap_resolver, parent_parameters=["x", "y"]) + if ( + self.metadata + and self.metadata.has_band_dimension() + and isinstance(other, DataCube) + and other.metadata + and other.metadata.has_band_dimension() + ): + # Minimal client side metadata merging + merged_metadata = self.metadata + for b in other.metadata.band_dimension.bands: + if b not in merged_metadata.bands: + merged_metadata = merged_metadata.append_band(b) + else: + merged_metadata = None + # Overlapping bands without overlap resolver will give an error in the backend + if context: + arguments["context"] = context + return self.process(process_id="merge_cubes", arguments=arguments, metadata=merged_metadata)
+ + + merge = legacy_alias(merge_cubes, name="merge", since="0.4.6") + +
+[docs] + @openeo_process + def apply_kernel( + self, kernel: Union[np.ndarray, List[List[float]]], factor=1.0, border=0, + replace_invalid=0 + ) -> DataCube: + """ + Applies a focal operation based on a weighted kernel to each value of the specified dimensions in the data cube. + + The border parameter determines how the data is extended when the kernel overlaps with the borders. + The following options are available: + + * numeric value - fill with a user-defined constant number n: nnnnnn|abcdefgh|nnnnnn (default, with n = 0) + * replicate - repeat the value from the pixel at the border: aaaaaa|abcdefgh|hhhhhh + * reflect - mirror/reflect from the border: fedcba|abcdefgh|hgfedc + * reflect_pixel - mirror/reflect from the center of the pixel at the border: gfedcb|abcdefgh|gfedcb + * wrap - repeat/wrap the image: cdefgh|abcdefgh|abcdef + + + :param kernel: The kernel to be applied on the data cube. The kernel has to be as many dimensions as the data cube has dimensions. + :param factor: A factor that is multiplied to each value computed by the focal operation. This is basically a shortcut for explicitly multiplying each value by a factor afterwards, which is often required for some kernel-based algorithms such as the Gaussian blur. + :param border: Determines how the data is extended when the kernel overlaps with the borders. Defaults to fill the border with zeroes. + :param replace_invalid: This parameter specifies the value to replace non-numerical or infinite numerical values with. By default, those values are replaced with zeroes. + :return: A data cube with the newly computed values. The resolution, cardinality and the number of dimensions are the same as for the original data cube. + """ + return self.process('apply_kernel', { + 'data': THIS, + 'kernel': kernel.tolist() if isinstance(kernel, np.ndarray) else kernel, + 'factor': factor, + 'border': border, + 'replace_invalid': replace_invalid + })
+ + +
+[docs] + @openeo_process + def resolution_merge( + self, high_resolution_bands: List[str], low_resolution_bands: List[str], method: str = None + ) -> DataCube: + """ + Resolution merging algorithms try to improve the spatial resolution of lower resolution bands + (e.g. Sentinel-2 20M) based on higher resolution bands. (e.g. Sentinel-2 10M). + + External references: + + `Pansharpening explained <https://bok.eo4geo.eu/IP2-1-3>`_ + + `Example publication: 'Improving the Spatial Resolution of Land Surface Phenology by Fusing Medium- and + Coarse-Resolution Inputs' <https://doi.org/10.1109/TGRS.2016.2537929>`_ + + .. warning:: experimental process: not generally supported, API subject to change. + + :param high_resolution_bands: A list of band names to use as 'high-resolution' band. Either the unique band name (metadata field `name` in bands) or one of the common band names (metadata field `common_name` in bands). If unique band name and common name conflict, the unique band name has higher priority. The order of the specified array defines the order of the bands in the data cube. If multiple bands match a common name, all matched bands are included in the original order. These bands will remain unmodified. + :param low_resolution_bands: A list of band names for which the spatial resolution should be increased. Either the unique band name (metadata field `name` in bands) or one of the common band names (metadata field `common_name` in bands). If unique band name and common name conflict, the unique band name has higher priority. The order of the specified array defines the order of the bands in the data cube. If multiple bands match a common name, all matched bands are included in the original order. These bands will be modified by the process. + :param method: The method to use. The supported algorithms can vary between back-ends. Set to `null` (the default) to allow the back-end to choose, which will improve portability, but reduce reproducibility.. + :return: A datacube with the same bands and metadata as the input, but algorithmically increased spatial resolution for the selected bands. + """ + return self.process('resolution_merge', { + 'data': THIS, + 'high_resolution_bands': high_resolution_bands, + 'low_resolution_bands': low_resolution_bands, + 'method': method, + + })
+ + +
+[docs] + def raster_to_vector(self) -> VectorCube: + """ + Converts this raster data cube into a :py:class:`~openeo.rest.vectorcube.VectorCube`. + The bounding polygon of homogenous areas of pixels is constructed. + + .. warning:: experimental process: not generally supported, API subject to change. + + :return: a :py:class:`~openeo.rest.vectorcube.VectorCube` + """ + pg_node = PGNode(process_id="raster_to_vector", arguments={"data": self}) + return VectorCube(pg_node, connection=self._connection)
+ + + ####VIEW methods ####### + +
+[docs] + @deprecated( + "Use :py:meth:`aggregate_spatial` with reducer ``'mean'``.", version="0.10.0" + ) + def polygonal_mean_timeseries( + self, polygon: Union[Polygon, MultiPolygon, str] + ) -> VectorCube: + """ + Extract a mean time series for the given (multi)polygon. Its points are + expected to be in the EPSG:4326 coordinate + reference system. + + :param polygon: The (multi)polygon; or a file path or HTTP URL to a GeoJSON file or shape file + """ + return self.aggregate_spatial(geometries=polygon, reducer="mean")
+ + +
+[docs] + @deprecated( + "Use :py:meth:`aggregate_spatial` with reducer ``'histogram'``.", + version="0.10.0", + ) + def polygonal_histogram_timeseries( + self, polygon: Union[Polygon, MultiPolygon, str] + ) -> VectorCube: + """ + Extract a histogram time series for the given (multi)polygon. Its points are + expected to be in the EPSG:4326 coordinate + reference system. + + :param polygon: The (multi)polygon; or a file path or HTTP URL to a GeoJSON file or shape file + """ + return self.aggregate_spatial(geometries=polygon, reducer="histogram")
+ + +
+[docs] + @deprecated( + "Use :py:meth:`aggregate_spatial` with reducer ``'median'``.", version="0.10.0" + ) + def polygonal_median_timeseries( + self, polygon: Union[Polygon, MultiPolygon, str] + ) -> VectorCube: + """ + Extract a median time series for the given (multi)polygon. Its points are + expected to be in the EPSG:4326 coordinate + reference system. + + :param polygon: The (multi)polygon; or a file path or HTTP URL to a GeoJSON file or shape file + """ + return self.aggregate_spatial(geometries=polygon, reducer="median")
+ + +
+[docs] + @deprecated( + "Use :py:meth:`aggregate_spatial` with reducer ``'sd'``.", version="0.10.0" + ) + def polygonal_standarddeviation_timeseries( + self, polygon: Union[Polygon, MultiPolygon, str] + ) -> VectorCube: + """ + Extract a time series of standard deviations for the given (multi)polygon. Its points are + expected to be in the EPSG:4326 coordinate + reference system. + + :param polygon: The (multi)polygon; or a file path or HTTP URL to a GeoJSON file or shape file + """ + return self.aggregate_spatial(geometries=polygon, reducer="sd")
+ + +
+[docs] + @openeo_process + def ard_surface_reflectance( + self, atmospheric_correction_method: str, cloud_detection_method: str, elevation_model: str = None, + atmospheric_correction_options: dict = None, cloud_detection_options: dict = None, + ) -> DataCube: + """ + Computes CARD4L compliant surface reflectance values from optical input. + + :param atmospheric_correction_method: The atmospheric correction method to use. + :param cloud_detection_method: The cloud detection method to use. + :param elevation_model: The digital elevation model to use, leave empty to allow the back-end to make a suitable choice. + :param atmospheric_correction_options: Proprietary options for the atmospheric correction method. + :param cloud_detection_options: Proprietary options for the cloud detection method. + :return: Data cube containing bottom of atmosphere reflectances with atmospheric disturbances like clouds and cloud shadows removed. The data returned is CARD4L compliant and contains metadata. + """ + return self.process('ard_surface_reflectance', { + 'data': THIS, + 'atmospheric_correction_method': atmospheric_correction_method, + 'cloud_detection_method': cloud_detection_method, + 'elevation_model': elevation_model, + 'atmospheric_correction_options': atmospheric_correction_options or {}, + 'cloud_detection_options': cloud_detection_options or {}, + })
+ + +
+[docs] + @openeo_process + def atmospheric_correction(self, method: str = None, elevation_model: str = None, options: dict = None) -> DataCube: + """ + Applies an atmospheric correction that converts top of atmosphere reflectance values into bottom of atmosphere/top of canopy reflectance values. + + Note that multiple atmospheric methods exist, but may not be supported by all backends. The method parameter gives + you the option of requiring a specific method, but this may result in an error if the backend does not support it. + + :param method: The atmospheric correction method to use. To get reproducible results, you have to set a specific method. Set to `null` to allow the back-end to choose, which will improve portability, but reduce reproducibility as you *may* get different results if you run the processes multiple times. + :param elevation_model: The digital elevation model to use, leave empty to allow the back-end to make a suitable choice. + :param options: Proprietary options for the atmospheric correction method. + :return: datacube with bottom of atmosphere reflectances + """ + return self.process('atmospheric_correction', { + 'data': THIS, + 'method': method, + 'elevation_model': elevation_model, + 'options': options or {}, + })
+ + +
+[docs] + @openeo_process + def save_result( + self, + format: str = _DEFAULT_RASTER_FORMAT, + options: Optional[dict] = None, + ) -> DataCube: + if self._connection: + formats = set(self._connection.list_output_formats().keys()) + # TODO: map format to correct casing too? + if format.lower() not in {f.lower() for f in formats}: + raise ValueError("Invalid format {f!r}. Should be one of {s}".format(f=format, s=formats)) + return self.process( + process_id="save_result", + arguments={ + "data": THIS, + "format": format, + # TODO: leave out options if unset? + "options": options or {} + } + )
+ + +
+[docs] + def download( + self, + outputfile: Optional[Union[str, pathlib.Path]] = None, + format: Optional[str] = None, + options: Optional[dict] = None, + *, + validate: Optional[bool] = None, + auto_add_save_result: bool = True, + additional: Optional[dict] = None, + job_options: Optional[dict] = None, + ) -> Union[None, bytes]: + """ + Execute synchronously and download the raster data cube, e.g. as GeoTIFF. + + If outputfile is provided, the result is stored on disk locally, otherwise, a bytes object is returned. + The bytes object can be passed on to a suitable decoder for decoding. + + :param outputfile: Optional, an output file if the result needs to be stored on disk. + :param format: Optional, an output format supported by the backend. + :param options: Optional, file format options + :param validate: Optional toggle to enable/prevent validation of the process graphs before execution + (overruling the connection's ``auto_validate`` setting). + :param auto_add_save_result: Automatically add a ``save_result`` node to the process graph if there is none yet. + :param additional: additional (top-level) properties to set in the request body + :param job_options: dictionary of job options to pass to the backend + (under top-level property "job_options") + + :return: None if the result is stored to disk, or a bytes object returned by the backend. + + .. versionchanged:: 0.32.0 + Added ``auto_add_save_result`` option + + .. versionadded:: 0.36.0 + Added arguments ``additional`` and ``job_options``. + """ + # TODO #278 centralize download/create_job/execute_job logic in DataCube, VectorCube, MlModel, ... + cube = self + if auto_add_save_result: + cube = _ensure_save_result( + cube=cube, + format=format, + options=options, + weak_format=guess_format(outputfile) if outputfile else None, + default_format=self._DEFAULT_RASTER_FORMAT, + method="DataCube.download()", + ) + return self._connection.download( + cube.flat_graph(), outputfile, validate=validate, additional=additional, job_options=job_options + )
+ + +
+[docs] + def validate(self) -> List[dict]: + """ + Validate a process graph without executing it. + + :return: list of errors (dictionaries with "code" and "message" fields) + """ + return self._connection.validate_process_graph(self.flat_graph())
+ + + def tiled_viewing_service(self, type: str, **kwargs) -> Service: + return self._connection.create_service(self.flat_graph(), type=type, **kwargs) + + def _get_spatial_extent_from_load_collection(self): + pg = self.flat_graph() + for node in pg: + if pg[node]["process_id"] == "load_collection": + if "spatial_extent" in pg[node]["arguments"] and all( + cd in pg[node]["arguments"]["spatial_extent"] for cd in ["east", "west", "south", "north"] + ): + return pg[node]["arguments"]["spatial_extent"] + return None + +
+[docs] + def preview( + self, + center: Union[Iterable, None] = None, + zoom: Union[int, None] = None, + ): + """ + Creates a service with the process graph and displays a map widget. Only supports XYZ. + + :param center: (optional) Map center. Default is (0,0). + :param zoom: (optional) Zoom level of the map. Default is 1. + + :return: ipyleaflet Map object and the displayed Service + + .. warning:: experimental feature, subject to change. + .. versionadded:: 0.19.0 + """ + if "XYZ" not in self.connection.list_service_types(): + raise OpenEoClientException("Backend does not support service type 'XYZ'.") + + if not in_jupyter_context(): + raise Exception("On-demand preview only supported in Jupyter notebooks!") + try: + import ipyleaflet + except ImportError: + raise Exception( + "Additional modules must be installed for on-demand preview. Run `pip install openeo[jupyter]` or refer to the documentation." + ) + + service = self.tiled_viewing_service("XYZ") + service_metadata = service.describe_service() + + m = ipyleaflet.Map( + center=center or (0, 0), + zoom=zoom or 1, + scroll_wheel_zoom=True, + basemap=ipyleaflet.basemaps.OpenStreetMap.Mapnik, + ) + service_layer = ipyleaflet.TileLayer(url=service_metadata["url"]) + m.add(service_layer) + + if center is None and zoom is None: + spatial_extent = self._get_spatial_extent_from_load_collection() + if spatial_extent is not None: + m.fit_bounds( + [ + [spatial_extent["south"], spatial_extent["west"]], + [spatial_extent["north"], spatial_extent["east"]], + ] + ) + + class Preview: + """ + On-demand preview instance holding the associated XYZ service and ipyleaflet Map + """ + + def __init__(self, service: Service, ipyleaflet_map: ipyleaflet.Map): + self.service = service + self.map = ipyleaflet_map + + def _repr_html_(self): + from IPython.display import display + + display(self.map) + + def delete_service(self): + self.service.delete_service() + + return Preview(service, m)
+ + +
+[docs] + def execute_batch( + self, + outputfile: Optional[Union[str, pathlib.Path]] = None, + out_format: Optional[str] = None, + *, + title: Optional[str] = None, + description: Optional[str] = None, + plan: Optional[str] = None, + budget: Optional[float] = None, + print: typing.Callable[[str], None] = print, + max_poll_interval: float = 60, + connection_retry_interval: float = 30, + additional: Optional[dict] = None, + job_options: Optional[dict] = None, + validate: Optional[bool] = None, + auto_add_save_result: bool = True, + # TODO: deprecate `format_options` as keyword arguments + **format_options, + ) -> BatchJob: + """ + Evaluate the process graph by creating a batch job, and retrieving the results when it is finished. + This method is mostly recommended if the batch job is expected to run in a reasonable amount of time. + + For very long-running jobs, you probably do not want to keep the client running. + + :param outputfile: The path of a file to which a result can be written + :param out_format: (optional) File format to use for the job result. + :param additional: additional (top-level) properties to set in the request body + :param job_options: dictionary of job options to pass to the backend + (under top-level property "job_options") + :param validate: Optional toggle to enable/prevent validation of the process graphs before execution + (overruling the connection's ``auto_validate`` setting). + :param auto_add_save_result: Automatically add a ``save_result`` node to the process graph if there is none yet. + + .. versionchanged:: 0.32.0 + Added ``auto_add_save_result`` option + + .. versionadded:: 0.36.0 + Added argument ``additional``. + """ + # TODO: start showing deprecation warnings about these inconsistent argument names + if "format" in format_options and not out_format: + out_format = format_options["format"] # align with 'download' call arg name + + # TODO #278 centralize download/create_job/execute_job logic in DataCube, VectorCube, MlModel, ... + cube = self + if auto_add_save_result: + cube = _ensure_save_result( + cube=cube, + format=out_format, + options=format_options, + weak_format=guess_format(outputfile) if outputfile else None, + default_format=self._DEFAULT_RASTER_FORMAT, + method="DataCube.execute_batch()", + ) + + job = cube.create_job( + title=title, + description=description, + plan=plan, + budget=budget, + additional=additional, + job_options=job_options, + validate=validate, + auto_add_save_result=False, + ) + return job.run_synchronous( + outputfile=outputfile, + print=print, max_poll_interval=max_poll_interval, connection_retry_interval=connection_retry_interval + )
+ + +
+[docs] + def create_job( + self, + out_format: Optional[str] = None, + *, + title: Optional[str] = None, + description: Optional[str] = None, + plan: Optional[str] = None, + budget: Optional[float] = None, + additional: Optional[dict] = None, + job_options: Optional[dict] = None, + validate: Optional[bool] = None, + auto_add_save_result: bool = True, + # TODO: avoid `format_options` as keyword arguments + **format_options, + ) -> BatchJob: + """ + Sends the datacube's process graph as a batch job to the back-end + and return a :py:class:`~openeo.rest.job.BatchJob` instance. + + Note that the batch job will just be created at the back-end, + it still needs to be started and tracked explicitly. + Use :py:meth:`execute_batch` instead to have the openEO Python client take care of that job management. + + :param out_format: output file format. + :param title: job title + :param description: job description + :param plan: The billing plan to process and charge the job with + :param budget: Maximum budget to be spent on executing the job. + Note that some backends do not honor this limit. + :param additional: additional (top-level) properties to set in the request body + :param job_options: dictionary of job options to pass to the backend + (under top-level property "job_options") + :param validate: Optional toggle to enable/prevent validation of the process graphs before execution + (overruling the connection's ``auto_validate`` setting). + :param auto_add_save_result: Automatically add a ``save_result`` node to the process graph if there is none yet. + + :return: Created job. + + .. versionadded:: 0.32.0 + Added ``auto_add_save_result`` option + + .. versionadded:: 0.36.0 + Added ``additional`` argument. + """ + # TODO: add option to also automatically start the job? + # TODO: avoid using all kwargs as format_options + # TODO #278 centralize download/create_job/execute_job logic in DataCube, VectorCube, MlModel, ... + cube = self + if auto_add_save_result: + cube = _ensure_save_result( + cube=cube, + format=out_format, + options=format_options or None, + default_format=self._DEFAULT_RASTER_FORMAT, + method="DataCube.create_job()", + ) + return self._connection.create_job( + process_graph=cube.flat_graph(), + title=title, + description=description, + plan=plan, + budget=budget, + validate=validate, + additional=additional, + job_options=job_options, + )
+ + + send_job = legacy_alias(create_job, name="send_job", since="0.10.0") + +
+[docs] + def save_user_defined_process( + self, + user_defined_process_id: str, + public: bool = False, + summary: Optional[str] = None, + description: Optional[str] = None, + returns: Optional[dict] = None, + categories: Optional[List[str]] = None, + examples: Optional[List[dict]] = None, + links: Optional[List[dict]] = None, + ) -> RESTUserDefinedProcess: + """ + Saves this process graph in the backend as a user-defined process for the authenticated user. + + :param user_defined_process_id: unique identifier for the process + :param public: visible to other users? + :param summary: A short summary of what the process does. + :param description: Detailed description to explain the entity. CommonMark 0.29 syntax MAY be used for rich text representation. + :param returns: Description and schema of the return value. + :param categories: A list of categories. + :param examples: A list of examples. + :param links: A list of links. + :return: a RESTUserDefinedProcess instance + """ + return self._connection.save_user_defined_process( + user_defined_process_id=user_defined_process_id, + process_graph=self.flat_graph(), public=public, summary=summary, description=description, + returns=returns, categories=categories, examples=examples, links=links, + )
+ + +
+[docs] + def execute(self, *, validate: Optional[bool] = None, auto_decode: bool = True) -> Union[dict, requests.Response]: + """ + Execute a process graph synchronously and return the result. If the result is a JSON object, it will be parsed. + + :param validate: Optional toggle to enable/prevent validation of the process graphs before execution + (overruling the connection's ``auto_validate`` setting). + :param auto_decode: Boolean flag to enable/disable automatic JSON decoding of the response. Defaults to True. + + :return: parsed JSON response as a dict if auto_decode is True, otherwise response object + """ + # TODO: deprecated this. It's ill-defined how to "execute" a data cube without downloading it. + return self._connection.execute(self.flat_graph(), validate=validate, auto_decode=auto_decode)
+ + +
+[docs] + @staticmethod + @deprecated(reason="Use :py:func:`openeo.udf.run_code.execute_local_udf` instead", version="0.7.0") + def execute_local_udf(udf: str, datacube: Union[str, 'xarray.DataArray', 'XarrayDataCube'] = None, fmt='netcdf'): + import openeo.udf.run_code + return openeo.udf.run_code.execute_local_udf(udf=udf, datacube=datacube, fmt=fmt)
+ + +
+[docs] + @openeo_process + def ard_normalized_radar_backscatter( + self, elevation_model: str = None, contributing_area=False, + ellipsoid_incidence_angle: bool = False, noise_removal: bool = True + ) -> DataCube: + """ + Computes CARD4L compliant backscatter (gamma0) from SAR input. + This method is a variant of :py:meth:`~openeo.rest.datacube.DataCube.sar_backscatter`, + with restricted parameters to generate backscatter according to CARD4L specifications. + + Note that backscatter computation may require instrument specific metadata that is tightly coupled to the original SAR products. + As a result, this process may only work in combination with loading data from specific collections, not with general data cubes. + + :param elevation_model: The digital elevation model to use. Set to None (the default) to allow the back-end to choose, which will improve portability, but reduce reproducibility. + :param contributing_area: If set to `true`, a DEM-based local contributing area band named `contributing_area` + is added. The values are given in square meters. + :param ellipsoid_incidence_angle: If set to `True`, an ellipsoidal incidence angle band named `ellipsoid_incidence_angle` is added. The values are given in degrees. + :param noise_removal: If set to `false`, no noise removal is applied. Defaults to `True`, which removes noise. + + :return: Backscatter values expressed as gamma0. The data returned is CARD4L compliant and contains metadata. By default, the backscatter values are given in linear scale. + """ + return self.process(process_id="ard_normalized_radar_backscatter", arguments={ + "data": THIS, + "elevation_model": elevation_model, + "contributing_area": contributing_area, + "ellipsoid_incidence_angle": ellipsoid_incidence_angle, + "noise_removal": noise_removal + })
+ + +
+[docs] + @openeo_process + def sar_backscatter( + self, + coefficient: Union[str, None] = "gamma0-terrain", + elevation_model: Union[str, None] = None, + mask: bool = False, + contributing_area: bool = False, + local_incidence_angle: bool = False, + ellipsoid_incidence_angle: bool = False, + noise_removal: bool = True, + options: Optional[dict] = None + ) -> DataCube: + """ + Computes backscatter from SAR input. + + Note that backscatter computation may require instrument specific metadata that is tightly coupled to the + original SAR products. As a result, this process may only work in combination with loading data from + specific collections, not with general data cubes. + + :param coefficient: Select the radiometric correction coefficient. + The following options are available: + + - `"beta0"`: radar brightness + - `"sigma0-ellipsoid"`: ground area computed with ellipsoid earth model + - `"sigma0-terrain"`: ground area computed with terrain earth model + - `"gamma0-ellipsoid"`: ground area computed with ellipsoid earth model in sensor line of sight + - `"gamma0-terrain"`: ground area computed with terrain earth model in sensor line of sight (default) + - `None`: non-normalized backscatter + :param elevation_model: The digital elevation model to use. Set to `None` (the default) to allow + the back-end to choose, which will improve portability, but reduce reproducibility. + :param mask: If set to `true`, a data mask is added to the bands with the name `mask`. + It indicates which values are valid (1), invalid (0) or contain no-data (null). + :param contributing_area: If set to `true`, a DEM-based local contributing area band named `contributing_area` + is added. The values are given in square meters. + :param local_incidence_angle: If set to `true`, a DEM-based local incidence angle band named + `local_incidence_angle` is added. The values are given in degrees. + :param ellipsoid_incidence_angle: If set to `true`, an ellipsoidal incidence angle band named + `ellipsoid_incidence_angle` is added. The values are given in degrees. + :param noise_removal: If set to `false`, no noise removal is applied. Defaults to `true`, which removes noise. + :param options: dictionary with additional (backend-specific) options. + :return: + + .. versionadded:: 0.4.9 + .. versionchanged:: 0.4.10 replace `orthorectify` and `rtc` arguments with `coefficient`. + """ + coefficient_options = [ + "beta0", "sigma0-ellipsoid", "sigma0-terrain", "gamma0-ellipsoid", "gamma0-terrain", None + ] + if coefficient not in coefficient_options: + raise OpenEoClientException("Invalid `sar_backscatter` coefficient {c!r}. Should be one of {o}".format( + c=coefficient, o=coefficient_options + )) + arguments = { + "data": THIS, + "coefficient": coefficient, + "elevation_model": elevation_model, + "mask": mask, + "contributing_area": contributing_area, + "local_incidence_angle": local_incidence_angle, + "ellipsoid_incidence_angle": ellipsoid_incidence_angle, + "noise_removal": noise_removal, + } + if options: + arguments["options"] = options + return self.process(process_id="sar_backscatter", arguments=arguments)
+ + +
+[docs] + @openeo_process + def fit_curve(self, parameters: list, function: Union[str, PGNode, typing.Callable], dimension: str): + """ + Use non-linear least squares to fit a model function `y = f(x, parameters)` to data. + + The process throws an `InvalidValues` exception if invalid values are encountered. + Invalid values are finite numbers (see also ``is_valid()``). + + .. warning:: experimental process: not generally supported, API subject to change. + https://github.com/Open-EO/openeo-processes/pull/240 + + :param parameters: + :param function: "child callback" function, see :ref:`callbackfunctions` + :param dimension: + """ + # TODO: does this return a `DataCube`? Shouldn't it just return an array (wrapper)? + return self.process( + process_id="fit_curve", + arguments={ + "data": THIS, + "parameters": parameters, + "function": build_child_callback(function, parent_parameters=["x", "parameters"]), + "dimension": dimension, + }, + )
+ + +
+[docs] + @openeo_process + def predict_curve( + self, parameters: list, function: Union[str, PGNode, typing.Callable], dimension: str, + labels=None + ): + """ + Predict values using a model function and pre-computed parameters. + + .. warning:: experimental process: not generally supported, API subject to change. + https://github.com/Open-EO/openeo-processes/pull/240 + + :param parameters: + :param function: "child callback" function, see :ref:`callbackfunctions` + :param dimension: + """ + return self.process( + process_id="predict_curve", + arguments={ + "data": THIS, + "parameters": parameters, + "function": build_child_callback(function, parent_parameters=["x", "parameters"]), + "dimension": dimension, + "labels": labels, + }, + )
+ + +
+[docs] + @openeo_process(mode="reduce_dimension") + def predict_random_forest(self, model: Union[str, BatchJob, MlModel], dimension: str = "bands"): + """ + Apply ``reduce_dimension`` process with a ``predict_random_forest`` reducer. + + :param model: a reference to a trained model, one of + + - a :py:class:`~openeo.rest.mlmodel.MlModel` instance (e.g. loaded from :py:meth:`Connection.load_ml_model`) + - a :py:class:`~openeo.rest.job.BatchJob` instance of a batch job that saved a single random forest model + - a job id (``str``) of a batch job that saved a single random forest model + - a STAC item URL (``str``) to load the random forest from. + (The STAC Item must implement the `ml-model` extension.) + :param dimension: dimension along which to apply the ``reduce_dimension`` process. + + .. versionadded:: 0.10.0 + """ + if not isinstance(model, MlModel): + model = MlModel.load_ml_model(connection=self.connection, id=model) + reducer = PGNode( + process_id="predict_random_forest", data={"from_parameter": "data"}, model={"from_parameter": "context"} + ) + return self.reduce_dimension(dimension=dimension, reducer=reducer, context=model)
+ + +
+[docs] + @openeo_process + def dimension_labels(self, dimension: str) -> DataCube: + """ + Gives all labels for a dimension in the data cube. The labels have the same order as in the data cube. + + :param dimension: The name of the dimension to get the labels for. + """ + if self._do_metadata_normalization(): + dimension_names = self.metadata.dimension_names() + if dimension_names and dimension not in dimension_names: + raise ValueError(f"Invalid dimension name {dimension!r}, should be one of {dimension_names}") + return self.process(process_id="dimension_labels", arguments={"data": THIS, "dimension": dimension})
+ + +
+[docs] + @openeo_process + def flatten_dimensions(self, dimensions: List[str], target_dimension: str, label_separator: Optional[str] = None): + """ + Combines multiple given dimensions into a single dimension by flattening the values + and merging the dimension labels with the given `label_separator`. Non-string dimension labels will + be converted to strings. This process is the opposite of the process :py:meth:`unflatten_dimension()` + but executing both processes subsequently doesn't necessarily create a data cube that + is equal to the original data cube. + + :param dimensions: The names of the dimension to combine. + :param target_dimension: The name of a target dimension with a single dimension label to replace. + :param label_separator: The string that will be used as a separator for the concatenated dimension labels. + :return: A data cube with the new shape. + + .. warning:: experimental process: not generally supported, API subject to change. + .. versionadded:: 0.10.0 + """ + return self.process( + process_id="flatten_dimensions", + arguments=dict_no_none( + data=THIS, + dimensions=dimensions, + target_dimension=target_dimension, + label_separator=label_separator, + ), + )
+ + +
+[docs] + @openeo_process + def unflatten_dimension(self, dimension: str, target_dimensions: List[str], label_separator: Optional[str] = None): + """ + Splits a single dimension into multiple dimensions by systematically extracting values and splitting + the dimension labels by the given `label_separator`. + This process is the opposite of the process :py:meth:`flatten_dimensions()` but executing both processes + subsequently doesn't necessarily create a data cube that is equal to the original data cube. + + :param dimension: The name of the dimension to split. + :param target_dimensions: The names of the target dimensions. + :param label_separator: The string that will be used as a separator to split the dimension labels. + :return: A data cube with the new shape. + + .. warning:: experimental process: not generally supported, API subject to change. + .. versionadded:: 0.10.0 + """ + return self.process( + process_id="unflatten_dimension", + arguments=dict_no_none( + data=THIS, + dimension=dimension, + target_dimensions=target_dimensions, + label_separator=label_separator, + ), + )
+
+ +
+ +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/openeo/rest/graph_building.html b/_modules/openeo/rest/graph_building.html new file mode 100644 index 000000000..2b56fcc6d --- /dev/null +++ b/_modules/openeo/rest/graph_building.html @@ -0,0 +1,208 @@ + + + + + + + openeo.rest.graph_building — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +

Source code for openeo.rest.graph_building

+"""
+Public openEO process graph building utilities
+'''''''''''''''''''''''''''''''''''''''''''''''
+
+"""
+from __future__ import annotations
+
+from typing import Optional
+
+from openeo.internal.graph_building import PGNode, _FromNodeMixin
+from openeo.processes import ProcessBuilder
+
+
+
+[docs] +class CollectionProperty(_FromNodeMixin): + """ + Helper object to easily create simple collection metadata property filters + to be used with :py:meth:`Connection.load_collection() <openeo.rest.connection.Connection.load_collection>`. + + .. note:: This class should not be used directly by end user code. + Use the :py:func:`~openeo.rest.graph_building.collection_property` factory instead. + + .. warning:: this is an experimental feature, naming might change. + """ + + def __init__(self, name: str, _builder: Optional[ProcessBuilder] = None): + self.name = name + self._builder = _builder or ProcessBuilder(pgnode={"from_parameter": "value"}) + + def from_node(self) -> PGNode: + return self._builder.from_node() + + def __eq__(self, other) -> CollectionProperty: + return CollectionProperty(self.name, _builder=self._builder == other) + + def __ne__(self, other) -> CollectionProperty: + return CollectionProperty(self.name, _builder=self._builder != other) + + def __gt__(self, other) -> CollectionProperty: + return CollectionProperty(self.name, _builder=self._builder > other) + + def __ge__(self, other) -> CollectionProperty: + return CollectionProperty(self.name, _builder=self._builder >= other) + + def __lt__(self, other) -> CollectionProperty: + return CollectionProperty(self.name, _builder=self._builder < other) + + def __le__(self, other) -> CollectionProperty: + return CollectionProperty(self.name, _builder=self._builder <= other)
+ + + +
+[docs] +def collection_property(name: str) -> CollectionProperty: + """ + Helper to easily create simple collection metadata property filters + to be used with :py:meth:`Connection.load_collection() <openeo.rest.connection.Connection.load_collection>`. + + Usage example: + + .. code-block:: python + + from openeo import collection_property + ... + + connection.load_collection( + ... + properties=[ + collection_property("eo:cloud_cover") <= 75, + collection_property("platform") == "Sentinel-2B", + ] + ) + + .. warning:: this is an experimental feature, naming might change. + + .. versionadded:: 0.26.0 + + :param name: name of the collection property to filter on + :return: an object that supports operators like ``<=``, ``==`` to easily build simple property filters. + """ + return CollectionProperty(name=name)
+ +
+ +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/openeo/rest/job.html b/_modules/openeo/rest/job.html new file mode 100644 index 000000000..88e3626e7 --- /dev/null +++ b/_modules/openeo/rest/job.html @@ -0,0 +1,751 @@ + + + + + + + openeo.rest.job — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +

Source code for openeo.rest.job

+from __future__ import annotations
+
+import datetime
+import json
+import logging
+import time
+import typing
+from pathlib import Path
+from typing import Dict, List, Optional, Union
+
+import requests
+
+from openeo.api.logs import LogEntry, log_level_name, normalize_log_level
+from openeo.internal.documentation import openeo_endpoint
+from openeo.internal.jupyter import (
+    VisualDict,
+    VisualList,
+    render_component,
+    render_error,
+)
+from openeo.internal.warnings import deprecated, legacy_alias
+from openeo.rest import (
+    DEFAULT_DOWNLOAD_CHUNK_SIZE,
+    JobFailedException,
+    OpenEoApiError,
+    OpenEoApiPlainError,
+    OpenEoClientException,
+)
+from openeo.util import ensure_dir
+
+if typing.TYPE_CHECKING:
+    # Imports for type checking only (circular import issue at runtime).
+    from openeo.rest.connection import Connection
+
+logger = logging.getLogger(__name__)
+
+
+DEFAULT_JOB_RESULTS_FILENAME = "job-results.json"
+
+
+
+[docs] +class BatchJob: + """ + Handle for an openEO batch job, allowing it to describe, start, cancel, inspect results, etc. + + .. versionadded:: 0.11.0 + This class originally had the more cryptic name :py:class:`RESTJob`, + which is still available as legacy alias, + but :py:class:`BatchJob` is recommended since version 0.11.0. + + """ + + # TODO #425 method to bootstrap `load_stac` directly from a BatchJob object + + def __init__(self, job_id: str, connection: Connection): + self.job_id = job_id + """Unique identifier of the batch job (string).""" + + self.connection = connection + + def __repr__(self): + return '<{c} job_id={i!r}>'.format(c=self.__class__.__name__, i=self.job_id) + + def _repr_html_(self): + data = self.describe() + currency = self.connection.capabilities().currency() + return render_component('job', data=data, parameters={'currency': currency}) + +
+[docs] + @openeo_endpoint("GET /jobs/{job_id}") + def describe(self) -> dict: + """ + Get detailed metadata about a submitted batch job + (title, process graph, status, progress, ...). + + .. versionadded:: 0.20.0 + This method was previously called :py:meth:`describe_job`. + """ + return self.connection.get(f"/jobs/{self.job_id}", expected_status=200).json()
+ + + describe_job = legacy_alias(describe, name="describe_job", since="0.20.0", mode="soft") + +
+[docs] + def status(self) -> str: + """ + Get the status of the batch job + + :return: batch job status, one of "created", "queued", "running", "canceled", "finished" or "error". + """ + return self.describe().get("status", "N/A")
+ + +
+[docs] + @openeo_endpoint("DELETE /jobs/{job_id}") + def delete(self): + """ + Delete this batch job. + + .. versionadded:: 0.20.0 + This method was previously called :py:meth:`delete_job`. + """ + self.connection.delete(f"/jobs/{self.job_id}", expected_status=204)
+ + + delete_job = legacy_alias(delete, name="delete_job", since="0.20.0", mode="soft") + +
+[docs] + @openeo_endpoint("GET /jobs/{job_id}/estimate") + def estimate(self): + """Calculate time/cost estimate for a job.""" + data = self.connection.get( + f"/jobs/{self.job_id}/estimate", expected_status=200 + ).json() + currency = self.connection.capabilities().currency() + return VisualDict('job-estimate', data=data, parameters={'currency': currency})
+ + + estimate_job = legacy_alias(estimate, name="estimate_job", since="0.20.0", mode="soft") + +
+[docs] + @openeo_endpoint("POST /jobs/{job_id}/results") + def start(self) -> BatchJob: + """ + Start this batch job. + + :return: Started batch job + + .. versionadded:: 0.20.0 + This method was previously called :py:meth:`start_job`. + """ + self.connection.post(f"/jobs/{self.job_id}/results", expected_status=202) + return self
+ + + start_job = legacy_alias(start, name="start_job", since="0.20.0", mode="soft") + +
+[docs] + @openeo_endpoint("DELETE /jobs/{job_id}/results") + def stop(self): + """ + Stop this batch job. + + .. versionadded:: 0.20.0 + This method was previously called :py:meth:`stop_job`. + """ + self.connection.delete(f"/jobs/{self.job_id}/results", expected_status=204)
+ + + stop_job = legacy_alias(stop, name="stop_job", since="0.20.0", mode="soft") + +
+[docs] + def get_results_metadata_url(self, *, full: bool = False) -> str: + """Get results metadata URL""" + url = f"/jobs/{self.job_id}/results" + if full: + url = self.connection.build_url(url) + return url
+ + +
+[docs] + @deprecated("Use :py:meth:`~BatchJob.get_results` instead.", version="0.4.10") + def list_results(self) -> dict: + """Get batch job results metadata.""" + return self.get_results().get_metadata()
+ + +
+[docs] + def download_result(self, target: Union[str, Path] = None) -> Path: + """ + Download single job result to the target file path or into folder (current working dir by default). + + Fails if there are multiple result files. + + :param target: String or path where the file should be downloaded to. + """ + return self.get_results().download_file(target=target)
+ + +
+[docs] + @deprecated( + "Instead use :py:meth:`BatchJob.get_results` and the more flexible download functionality of :py:class:`JobResults`", + version="0.4.10") + def download_results(self, target: Union[str, Path] = None) -> Dict[Path, dict]: + """ + Download all job result files into given folder (current working dir by default). + + The names of the files are taken directly from the backend. + + :param target: String/path, folder where to put the result files. + :return: file_list: Dict containing the downloaded file path as value and asset metadata + """ + return self.get_result().download_files(target)
+ + +
+[docs] + @deprecated("Use :py:meth:`BatchJob.get_results` instead.", version="0.4.10") + def get_result(self): + return _Result(self)
+ + +
+[docs] + def get_results(self) -> JobResults: + """ + Get handle to batch job results for result metadata inspection or downloading resulting assets. + + .. versionadded:: 0.4.10 + """ + return JobResults(job=self)
+ + +
+[docs] + def logs( + self, offset: Optional[str] = None, level: Optional[Union[str, int]] = None + ) -> List[LogEntry]: + """Retrieve job logs. + + :param offset: The last identifier (property ``id`` of a LogEntry) the client has received. + + If provided, the back-ends only sends the entries that occurred after the specified identifier. + If not provided or empty, start with the first entry. + + Defaults to None. + + :param level: Minimum log level to retrieve. + + You can use either constants from Python's standard module ``logging`` + or their names (case-insensitive). + + For example: + ``logging.INFO``, ``"info"`` or ``"INFO"`` can all be used to show the messages + for level ``logging.INFO`` and above, i.e. also ``logging.WARNING`` and + ``logging.ERROR`` will be included. + + Default is to show all log levels, in other words ``logging.DEBUG``. + This is also the result when you explicitly pass log_level=None or log_level="". + + :return: A list containing the log entries for the batch job. + """ + url = f"/jobs/{self.job_id}/logs" + params = {} + if offset is not None: + params["offset"] = offset + if level is not None: + params["level"] = log_level_name(level) + response = self.connection.get(url, params=params, expected_status=200) + logs = response.json()["logs"] + + # Only filter logs when specified. + # We should still support client-side log_level filtering because not all backends + # support the minimum log level parameter. + if level is not None: + log_level = normalize_log_level(level) + logs = ( + log + for log in logs + if normalize_log_level(log.get("level")) >= log_level + ) + + entries = [LogEntry(log) for log in logs] + return VisualList("logs", data=entries)
+ + +
+[docs] + def run_synchronous( + self, outputfile: Union[str, Path, None] = None, + print=print, max_poll_interval=60, connection_retry_interval=30 + ) -> BatchJob: + """Start the job, wait for it to finish and download result""" + self.start_and_wait( + print=print, max_poll_interval=max_poll_interval, connection_retry_interval=connection_retry_interval + ) + # TODO #135 support multi file result sets too? + if outputfile is not None: + self.download_result(outputfile) + return self
+ + +
+[docs] + def start_and_wait( + self, print=print, max_poll_interval: int = 60, connection_retry_interval: int = 30, soft_error_max=10 + ) -> BatchJob: + """ + Start the batch job, poll its status and wait till it finishes (or fails) + + :param print: print/logging function to show progress/status + :param max_poll_interval: maximum number of seconds to sleep between status polls + :param connection_retry_interval: how long to wait when status poll failed due to connection issue + :param soft_error_max: maximum number of soft errors (e.g. temporary connection glitches) to allow + :return: + """ + # TODO rename `connection_retry_interval` to something more generic? + start_time = time.time() + + def elapsed() -> str: + return str(datetime.timedelta(seconds=time.time() - start_time)).rsplit(".")[0] + + def print_status(msg: str): + print("{t} Job {i!r}: {m}".format(t=elapsed(), i=self.job_id, m=msg)) + + # TODO: make `max_poll_interval`, `connection_retry_interval` class constants or instance properties? + print_status("send 'start'") + self.start() + + # TODO: also add `wait` method so you can track a job that already has started explicitly + # or just rename this method to `wait` and automatically do start if not started yet? + + # Start with fast polling. + poll_interval = min(5, max_poll_interval) + status = None + _soft_error_count = 0 + + def soft_error(message: str): + """Non breaking error (unless we had too much of them)""" + nonlocal _soft_error_count + _soft_error_count += 1 + if _soft_error_count > soft_error_max: + raise OpenEoClientException("Excessive soft errors") + print_status(message) + time.sleep(connection_retry_interval) + + while True: + # TODO: also allow a hard time limit on this infinite poll loop? + try: + job_info = self.describe() + except requests.ConnectionError as e: + soft_error("Connection error while polling job status: {e}".format(e=e)) + continue + except OpenEoApiPlainError as e: + if e.http_status_code in [502, 503]: + soft_error("Service availability error while polling job status: {e}".format(e=e)) + continue + else: + raise + + status = job_info.get("status", "N/A") + progress = '{p}%'.format(p=job_info["progress"]) if "progress" in job_info else "N/A" + print_status("{s} (progress {p})".format(s=status, p=progress)) + if status not in ('submitted', 'created', 'queued', 'running'): + break + + # Sleep for next poll (and adaptively make polling less frequent) + time.sleep(poll_interval) + poll_interval = min(1.25 * poll_interval, max_poll_interval) + + if status != "finished": + # TODO: allow to disable this printing logs (e.g. in non-interactive contexts)? + # TODO: render logs jupyter-aware in a notebook context? + print(f"Your batch job {self.job_id!r} failed. Error logs:") + print(self.logs(level=logging.ERROR)) + print( + f"Full logs can be inspected in an openEO (web) editor or with `connection.job({self.job_id!r}).logs()`." + ) + raise JobFailedException( + f"Batch job {self.job_id!r} didn't finish successfully. Status: {status} (after {elapsed()}).", + job=self, + ) + + return self
+
+ + + +
+[docs] +@deprecated(reason="Use :py:class:`BatchJob` instead", version="0.11.0") +class RESTJob(BatchJob): + """ + Legacy alias for :py:class:`BatchJob`. + """
+ + + +
+[docs] +class ResultAsset: + """ + Result asset of a batch job (e.g. a GeoTIFF or JSON file) + + .. versionadded:: 0.4.10 + """ + + def __init__(self, job: BatchJob, name: str, href: str, metadata: dict): + self.job = job + + self.name = name + """Asset name as advertised by the backend.""" + + self.href = href + """Download URL of the asset.""" + + self.metadata = metadata + """Asset metadata provided by the backend, possibly containing keys "type" (for media type), "roles", "title", "description".""" + + def __repr__(self): + return "<ResultAsset {n!r} (type {t}) at {h!r}>".format( + n=self.name, t=self.metadata.get("type", "unknown"), h=self.href + ) + +
+[docs] + def download( + self, target: Optional[Union[Path, str]] = None, *, chunk_size: int = DEFAULT_DOWNLOAD_CHUNK_SIZE + ) -> Path: + """ + Download asset to given location + + :param target: download target path. Can be an existing folder + (in which case the filename advertised by backend will be used) + or full file name. By default, the working directory will be used. + :param chunk_size: chunk size for streaming response. + """ + target = Path(target or Path.cwd()) + if target.is_dir(): + target = target / self.name + ensure_dir(target.parent) + logger.info("Downloading Job result asset {n!r} from {h!s} to {t!s}".format(n=self.name, h=self.href, t=target)) + response = self._get_response(stream=True) + with target.open("wb") as f: + for block in response.iter_content(chunk_size=chunk_size): + f.write(block) + return target
+ + + def _get_response(self, stream=True) -> requests.Response: + return self.job.connection.get(self.href, stream=stream) + +
+[docs] + def load_json(self) -> dict: + """Load asset in memory and parse as JSON.""" + if not (self.name.lower().endswith(".json") or self.metadata.get("type") == "application/json"): + logger.warning("Asset might not be JSON") + return self._get_response().json()
+ + +
+[docs] + def load_bytes(self) -> bytes: + """Load asset in memory as raw bytes.""" + return self._get_response().content
+
+ + + # TODO: more `load` methods e.g.: load GTiff asset directly as numpy array + + +class MultipleAssetException(OpenEoClientException): + pass + + +
+[docs] +class JobResults: + """ + Results of a batch job: listing of one or more output files (assets) + and some metadata. + + .. versionadded:: 0.4.10 + """ + + def __init__(self, job: BatchJob): + self._job = job + self._results = None + + def __repr__(self): + return "<JobResults for job {j!r}>".format(j=self._job.job_id) + + def get_job_id(self) -> str: + return self._job.job_id + + def _repr_html_(self): + try: + response = self.get_metadata() + return render_component("batch-job-result", data = response) + except OpenEoApiError as error: + return render_error(error) + +
+[docs] + def get_metadata(self, force=False) -> dict: + """Get batch job results metadata (parsed JSON)""" + if self._results is None or force: + self._results = self._job.connection.get( + self._job.get_results_metadata_url(), expected_status=200 + ).json() + return self._results
+ + + # TODO: provide methods for `stac_version`, `id`, `geometry`, `properties`, `links`, ...? + +
+[docs] + def get_assets(self) -> List[ResultAsset]: + """ + Get all assets from the job results. + """ + # TODO: add arguments to filter on metadata, e.g. to only get assets of type "image/tiff" + metadata = self.get_metadata() + # API 1.0 style: dictionary mapping filenames to metadata dict (with at least a "href" field) + assets = metadata.get("assets", {}) + if not assets: + logger.warning("No assets found in job result metadata.") + return [ + ResultAsset(job=self._job, name=name, href=asset["href"], metadata=asset) + for name, asset in assets.items() + ]
+ + +
+[docs] + def get_asset(self, name: str = None) -> ResultAsset: + """ + Get single asset by name or without name if there is only one. + """ + # TODO: also support getting a single asset by type or role? + assets = self.get_assets() + if len(assets) == 0: + raise OpenEoClientException("No assets in result.") + if name is None: + if len(assets) == 1: + return assets[0] + else: + raise MultipleAssetException("Multiple result assets for job {j}: {a}".format( + j=self._job.job_id, a=[a.name for a in assets] + )) + else: + try: + return next(a for a in assets if a.name == name) + except StopIteration: + raise OpenEoClientException( + "No asset {n!r} in: {a}".format(n=name, a=[a.name for a in assets]) + )
+ + +
+[docs] + def download_file(self, target: Union[Path, str] = None, name: str = None) -> Path: + """ + Download single asset. Can be used when there is only one asset in the + :py:class:`JobResults`, or when the desired asset name is given explicitly. + + :param target: path to download to. Can be an existing directory + (in which case the filename advertised by backend will be used) + or full file name. By default, the working directory will be used. + :param name: asset name to download (not required when there is only one asset) + :return: path of downloaded asset + """ + try: + return self.get_asset(name=name).download(target=target) + except MultipleAssetException: + raise OpenEoClientException( + "Can not use `download_file` with multiple assets. Use `download_files` instead.")
+ + +
+[docs] + def download_files(self, target: Union[Path, str] = None, include_stac_metadata: bool = True) -> List[Path]: + """ + Download all assets to given folder. + + :param target: path to folder to download to (must be a folder if it already exists) + :param include_stac_metadata: whether to download the job result metadata as a STAC (JSON) file. + :return: list of paths to the downloaded assets. + """ + target = Path(target or Path.cwd()) + if target.exists() and not target.is_dir(): + raise OpenEoClientException(f"Target argument {target} exists but isn't a folder.") + ensure_dir(target) + + downloaded = [a.download(target) for a in self.get_assets()] + + if include_stac_metadata: + # TODO #184: convention for metadata file name? + metadata_file = target / DEFAULT_JOB_RESULTS_FILENAME + # TODO #184: rewrite references to locally downloaded assets? + metadata_file.write_text(json.dumps(self.get_metadata())) + downloaded.append(metadata_file) + + return downloaded
+
+ + + +@deprecated(reason="Use :py:class:`JobResults` instead", version="0.4.10") +class _Result: + """ + Wrapper around `JobResults` to adapt old deprecated "Result" API. + + .. deprecated:: 0.4.10 + """ + + # TODO: deprecated: remove this + + def __init__(self, job): + self.results = JobResults(job=job) + + def download_file(self, target: Union[str, Path] = None) -> Path: + return self.results.download_file(target=target) + + def download_files(self, target: Union[str, Path] = None) -> Dict[Path, dict]: + target = Path(target or Path.cwd()) + if target.exists() and not target.is_dir(): + raise OpenEoClientException(f"Target argument {target} exists but isn't a folder.") + return {a.download(target): a.metadata for a in self.results.get_assets()} + + def load_json(self) -> dict: + return self.results.get_asset().load_json() + + def load_bytes(self) -> bytes: + return self.results.get_asset().load_bytes() +
+ +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/openeo/rest/mlmodel.html b/_modules/openeo/rest/mlmodel.html new file mode 100644 index 000000000..1bd488618 --- /dev/null +++ b/_modules/openeo/rest/mlmodel.html @@ -0,0 +1,285 @@ + + + + + + + openeo.rest.mlmodel — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +

Source code for openeo.rest.mlmodel

+from __future__ import annotations
+
+import logging
+import pathlib
+import typing
+from typing import Optional, Union
+
+from openeo.internal.documentation import openeo_process
+from openeo.internal.graph_building import PGNode
+from openeo.rest._datacube import _ProcessGraphAbstraction
+from openeo.rest.job import BatchJob
+
+if typing.TYPE_CHECKING:
+    # Imports for type checking only (circular import issue at runtime).
+    from openeo import Connection
+
+_log = logging.getLogger(__name__)
+
+
+
+[docs] +class MlModel(_ProcessGraphAbstraction): + """ + A machine learning model. + + It is the result of a training procedure, e.g. output of a ``fit_...`` process, + and can be used for prediction (classification or regression) with the corresponding ``predict_...`` process. + + .. versionadded:: 0.10.0 + """ + + def __init__(self, graph: PGNode, connection: Union[Connection, None]): + super().__init__(pgnode=graph, connection=connection) + +
+[docs] + def save_ml_model(self, options: Optional[dict] = None): + """ + Saves a machine learning model as part of a batch job. + + :param options: Additional parameters to create the file(s). + """ + pgnode = PGNode( + process_id="save_ml_model", + arguments={"data": self, "options": options or {}} + ) + return MlModel(graph=pgnode, connection=self._connection)
+ + +
+[docs] + @staticmethod + @openeo_process + def load_ml_model(connection: Connection, id: Union[str, BatchJob]) -> MlModel: + """ + Loads a machine learning model from a STAC Item. + + :param connection: connection object + :param id: STAC item reference, as URL, batch job (id) or user-uploaded file + :return: + + .. versionadded:: 0.10.0 + """ + if isinstance(id, BatchJob): + id = id.job_id + return MlModel(graph=PGNode(process_id="load_ml_model", id=id), connection=connection)
+ + +
+[docs] + def execute_batch( + self, + outputfile: Union[str, pathlib.Path], + *, + title: Optional[str] = None, + description: Optional[str] = None, + plan: Optional[str] = None, + budget: Optional[float] = None, + print=print, + max_poll_interval=60, + connection_retry_interval=30, + additional: Optional[dict] = None, + job_options: Optional[dict] = None, + ) -> BatchJob: + """ + Evaluate the process graph by creating a batch job, and retrieving the results when it is finished. + This method is mostly recommended if the batch job is expected to run in a reasonable amount of time. + + For very long running jobs, you probably do not want to keep the client running. + + :param job_options: + :param outputfile: The path of a file to which a result can be written + :param out_format: (optional) Format of the job result. + :param format_options: String Parameters for the job result format + :param additional: additional (top-level) properties to set in the request body + :param job_options: dictionary of job options to pass to the backend + (under top-level property "job_options") + + .. versionadded:: 0.36.0 + Added argument ``additional``. + """ + job = self.create_job( + title=title, + description=description, + plan=plan, + budget=budget, + additional=additional, + job_options=job_options, + ) + return job.run_synchronous( + # TODO #135 support multi file result sets too + outputfile=outputfile, + print=print, max_poll_interval=max_poll_interval, connection_retry_interval=connection_retry_interval + )
+ + +
+[docs] + def create_job( + self, + *, + title: Optional[str] = None, + description: Optional[str] = None, + plan: Optional[str] = None, + budget: Optional[float] = None, + additional: Optional[dict] = None, + job_options: Optional[dict] = None, + ) -> BatchJob: + """ + Sends a job to the backend and returns a ClientJob instance. + + :param title: job title + :param description: job description + :param plan: The billing plan to process and charge the job with + :param budget: Maximum budget to be spent on executing the job. + Note that some backends do not honor this limit. + :param additional: additional (top-level) properties to set in the request body + :param job_options: dictionary of job options to pass to the backend + (under top-level property "job_options") + :param format_options: String Parameters for the job result format + :return: Created job. + + .. versionadded:: 0.36.0 + Added argument ``additional``. + """ + # TODO: centralize `create_job` for `DataCube`, `VectorCube`, `MlModel`, ... + pg = self + if pg.result_node().process_id not in {"save_ml_model"}: + _log.warning("Process graph has no final `save_ml_model`. Adding it automatically.") + pg = pg.save_ml_model() + return self._connection.create_job( + process_graph=pg.flat_graph(), + title=title, + description=description, + plan=plan, + budget=budget, + additional=additional, + job_options=job_options, + )
+
+ +
+ +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/openeo/rest/multiresult.html b/_modules/openeo/rest/multiresult.html new file mode 100644 index 000000000..7200694de --- /dev/null +++ b/_modules/openeo/rest/multiresult.html @@ -0,0 +1,241 @@ + + + + + + + openeo.rest.multiresult — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +

Source code for openeo.rest.multiresult

+from __future__ import annotations
+
+from typing import Dict, List, Optional
+
+from openeo import BatchJob
+from openeo.internal.graph_building import FlatGraphableMixin, MultiLeafGraph
+from openeo.rest import OpenEoClientException
+from openeo.rest.connection import Connection, extract_connections
+
+
+
+[docs] +class MultiResult(FlatGraphableMixin): + """ + Helper to create and run batch jobs with process graphs + that contain multiple result nodes + or, more generally speaking, multiple process graph "leaf" nodes. + + Provide multiple + :py:class:`~openeo.rest.datacube.DataCube`/:py:class:`~openeo.rest.vectorcube.VectorCube` + instances to the constructor, + and start a batch job from that, + for example as follows: + + .. code-block:: python + + from openeo import MultiResult + + cube1 = ... + cube2 = ... + multi_result = MultiResult([cube1, cube2]) + job = multi_result.create_job() + + .. seealso:: + + :ref:`multi-result-process-graphs` + + .. versionadded:: 0.35.0 + """ + + __slots__ = ("_multi_leaf_graph", "_connection") + +
+[docs] + def __init__(self, leaves: List[FlatGraphableMixin], connection: Optional[Connection] = None): + """ + Build a :py:class:`MultiResult` instance from multiple leaf nodes + + :param leaves: list of objects that can be + converted to an openEO-style (flat) process graph representation, + typically :py:class:`~openeo.rest.datacube.DataCube` + or :py:class:`~openeo.rest.vectorcube.VectorCube` instances. + :param connection: Optional connection to use for creating/starting batch jobs, + for special use cases where the provided leaf instances + are not already associated with a connection. + """ + self._multi_leaf_graph = MultiLeafGraph(leaves=leaves) + self._connection = self._extract_connection(leaves=leaves, connection=connection)
+ + + @staticmethod + def _extract_connection(leaves: List[FlatGraphableMixin], connection: Optional[Connection] = None) -> Connection: + """ + Extract common connection from leaves and/or explicitly provided connection. + Fails if there are multiple or none. + """ + connections = set() + if connection: + connections.add(connection) + connections.update(extract_connections(leaves)) + + if len(connections) == 1: + return connections.pop() + elif len(connections) == 0: + raise OpenEoClientException("No connection in any of the MultiResult leaves") + else: + raise OpenEoClientException("MultiResult with multiple different connections") + + def flat_graph(self) -> Dict[str, dict]: + return self._multi_leaf_graph.flat_graph() + + def create_job( + self, + *, + title: Optional[str] = None, + description: Optional[str] = None, + additional: Optional[dict] = None, + job_options: Optional[dict] = None, + validate: Optional[bool] = None, + ) -> BatchJob: + return self._connection.create_job( + process_graph=self._multi_leaf_graph, + title=title, + description=description, + additional=additional, + job_options=job_options, + validate=validate, + ) + + def execute_batch( + self, + *, + title: Optional[str] = None, + description: Optional[str] = None, + additional: Optional[dict] = None, + job_options: Optional[dict] = None, + validate: Optional[bool] = None, + ) -> BatchJob: + job = self.create_job( + title=title, + description=description, + additional=additional, + job_options=job_options, + validate=validate, + ) + return job.run_synchronous()
+ +
+ +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/openeo/rest/udp.html b/_modules/openeo/rest/udp.html new file mode 100644 index 000000000..19d7b0994 --- /dev/null +++ b/_modules/openeo/rest/udp.html @@ -0,0 +1,266 @@ + + + + + + + openeo.rest.udp — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +

Source code for openeo.rest.udp

+from __future__ import annotations
+
+import typing
+from pathlib import Path
+from typing import List, Optional, Union
+
+from openeo.api.process import Parameter
+from openeo.internal.graph_building import FlatGraphableMixin, as_flat_graph
+from openeo.internal.jupyter import render_component
+from openeo.internal.processes.builder import ProcessBuilderBase
+from openeo.internal.warnings import deprecated
+from openeo.util import dict_no_none
+
+if typing.TYPE_CHECKING:
+    # Imports for type checking only (circular import issue at runtime).
+    from openeo.rest.connection import Connection
+
+
+
+[docs] +def build_process_dict( + process_graph: Union[dict, FlatGraphableMixin, Path, List[FlatGraphableMixin]], + process_id: Optional[str] = None, + summary: Optional[str] = None, + description: Optional[str] = None, + parameters: Optional[List[Union[Parameter, dict]]] = None, + returns: Optional[dict] = None, + categories: Optional[List[str]] = None, + examples: Optional[List[dict]] = None, + links: Optional[List[dict]] = None, +) -> dict: + """ + Build a dictionary describing a process with metadaa (`process_graph`, `parameters`, `description`, ...) + + :param process_graph: dict or builder representing a process graph + :param process_id: identifier of the process + :param summary: short summary of what the process does + :param description: detailed description + :param parameters: list of process parameters (which have name, schema, default value, ...) + :param returns: description and schema of what the process returns + :param categories: list of categories + :param examples: list of examples, may be used for unit tests + :param links: list of links related to the process + :return: dictionary in openEO "process graph with metadata" format + """ + process = dict_no_none( + process_graph=as_flat_graph(process_graph), + id=process_id, + summary=summary, + description=description, + returns=returns, + categories=categories, + examples=examples, + links=links + ) + if parameters is not None: + process["parameters"] = [ + (p if isinstance(p, Parameter) else Parameter(**p)).to_dict() + for p in parameters + ] + return process
+ + + +
+[docs] +class RESTUserDefinedProcess: + """ + Wrapper for a user-defined process stored (or to be stored) on an openEO back-end + """ + + def __init__(self, user_defined_process_id: str, connection: Connection): + self.user_defined_process_id = user_defined_process_id + self._connection = connection + self._connection.assert_user_defined_process_support() + + def _repr_html_(self): + process = self.describe() + return render_component('process', data=process, parameters = {'show-graph': True, 'provide-download': False}) + +
+[docs] + def store( + self, + process_graph: Union[dict, FlatGraphableMixin], + parameters: Optional[List[Union[Parameter, dict]]] = None, + public: bool = False, + summary: Optional[str] = None, + description: Optional[str] = None, + returns: Optional[dict] = None, + categories: Optional[List[str]] = None, + examples: Optional[List[dict]] = None, + links: Optional[List[dict]] = None, + ): + """Store a process graph and its metadata on the backend as a user-defined process""" + process = build_process_dict( + process_graph=process_graph, parameters=parameters, + summary=summary, description=description, returns=returns, + categories=categories, examples=examples, links=links, + ) + + # TODO: this "public" flag is not standardized yet EP-3609, https://github.com/Open-EO/openeo-api/issues/310 + process["public"] = public + + self._connection._preflight_validation(pg_with_metadata={"process": process}) + self._connection.put( + path="/process_graphs/{}".format(self.user_defined_process_id), json=process, expected_status=200 + )
+ + +
+[docs] + @deprecated( + "Use `store` instead. Method `update` is misleading: OpenEO API does not provide (partial) updates" + " of user-defined processes, only fully overwriting 'store' operations.", + version="0.4.11") + def update( + self, process_graph: Union[dict, ProcessBuilderBase], parameters: List[Union[Parameter, dict]] = None, + public: bool = False, summary: str = None, description: str = None + ): + self.store(process_graph=process_graph, parameters=parameters, public=public, summary=summary, + description=description)
+ + +
+[docs] + def describe(self) -> dict: + """Get metadata of this user-defined process.""" + # TODO: parse the "parameters" to Parameter objects? + return self._connection.get(path="/process_graphs/{}".format(self.user_defined_process_id)).json()
+ + +
+[docs] + def delete(self) -> None: + """Remove user-defined process from back-end""" + self._connection.delete(path="/process_graphs/{}".format(self.user_defined_process_id), expected_status=204)
+ + + def validate(self) -> None: + raise NotImplementedError
+ +
+ +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/openeo/rest/userfile.html b/_modules/openeo/rest/userfile.html new file mode 100644 index 000000000..8af1c5fd5 --- /dev/null +++ b/_modules/openeo/rest/userfile.html @@ -0,0 +1,242 @@ + + + + + + + openeo.rest.userfile — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +

Source code for openeo.rest.userfile

+from __future__ import annotations
+
+import typing
+from pathlib import Path, PurePosixPath
+from typing import Any, Dict, Optional, Union
+
+from openeo.rest import DEFAULT_DOWNLOAD_CHUNK_SIZE
+from openeo.util import ensure_dir
+
+if typing.TYPE_CHECKING:
+    # Imports for type checking only (circular import issue at runtime).
+    from openeo.rest.connection import Connection
+
+
+
+[docs] +class UserFile: + """ + Handle to a (user-uploaded) file in the user workspace on a openEO back-end. + """ + + def __init__( + self, + path: Union[str, PurePosixPath, None], + *, + connection: Connection, + metadata: Optional[dict] = None, + ): + if path: + pass + elif metadata and metadata.get("path"): + path = metadata.get("path") + else: + raise ValueError( + "File path should be specified through `path` or `metadata` argument." + ) + + self.path = PurePosixPath(path) + self.metadata = metadata or {"path": path} + self.connection = connection + +
+[docs] + @classmethod + def from_metadata(cls, metadata: dict, connection: Connection) -> UserFile: + """Build :py:class:`UserFile` from a workspace file metadata dictionary.""" + return cls(path=None, connection=connection, metadata=metadata)
+ + + def __repr__(self): + return "<{c} file={i!r}>".format(c=self.__class__.__name__, i=self.path) + + def _get_endpoint(self) -> str: + return f"/files/{self.path!s}" + +
+[docs] + def download(self, target: Union[Path, str] = None) -> Path: + """ + Downloads a user-uploaded file from the user workspace on the back-end + locally to the given location. + + :param target: local download target path. Can be an existing folder + (in which case the file name advertised by backend will be used) + or full file name. By default, the working directory will be used. + """ + response = self.connection.get( + self._get_endpoint(), expected_status=200, stream=True + ) + + target = Path(target or Path.cwd()) + if target.is_dir(): + target = target / self.path.name + ensure_dir(target.parent) + + with target.open(mode="wb") as f: + for chunk in response.iter_content(chunk_size=DEFAULT_DOWNLOAD_CHUNK_SIZE): + f.write(chunk) + + return target
+ + +
+[docs] + def upload(self, source: Union[Path, str]) -> UserFile: + """ + Uploads a local file to the path corresponding to this :py:class:`UserFile` in the user workspace + and returns new :py:class:`UserFile` of newly uploaded file. + + .. tip:: + Usually you'll just need + :py:meth:`Connection.upload_file() <openeo.rest.connection.Connection.upload_file>` + instead of this :py:class:`UserFile` method. + + If the file exists in the user workspace it will be replaced. + + :param source: A path to a file on the local file system to upload. + :return: new :py:class:`UserFile` instance of the newly uploaded file + """ + return self.connection.upload_file(source, target=self.path)
+ + +
+[docs] + def delete(self): + """Delete the user-uploaded file from the user workspace on the back-end.""" + self.connection.delete(self._get_endpoint(), expected_status=204)
+ + +
+[docs] + def to_dict(self) -> Dict[str, Any]: + """Returns the provided metadata as dict.""" + # This is used in internal/jupyter.py to detect and get the original metadata. + # TODO: make this more explicit with an internal API? + return self.metadata
+
+ +
+ +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/openeo/rest/vectorcube.html b/_modules/openeo/rest/vectorcube.html new file mode 100644 index 000000000..0158f4411 --- /dev/null +++ b/_modules/openeo/rest/vectorcube.html @@ -0,0 +1,799 @@ + + + + + + + openeo.rest.vectorcube — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +

Source code for openeo.rest.vectorcube

+from __future__ import annotations
+
+import json
+import pathlib
+import typing
+from typing import Callable, List, Optional, Tuple, Union
+
+import shapely.geometry.base
+
+import openeo.rest.datacube
+from openeo.api.process import Parameter
+from openeo.internal.documentation import openeo_process
+from openeo.internal.graph_building import PGNode
+from openeo.internal.warnings import legacy_alias
+from openeo.metadata import CollectionMetadata, CubeMetadata, Dimension
+from openeo.rest._datacube import (
+    THIS,
+    UDF,
+    _ensure_save_result,
+    _ProcessGraphAbstraction,
+    build_child_callback,
+)
+from openeo.rest.job import BatchJob
+from openeo.rest.mlmodel import MlModel
+from openeo.util import InvalidBBoxException, dict_no_none, guess_format, to_bbox_dict
+
+if typing.TYPE_CHECKING:
+    # Imports for type checking only (circular import issue at runtime).
+    from openeo import Connection
+
+
+
+[docs] +class VectorCube(_ProcessGraphAbstraction): + """ + A Vector Cube, or 'Vector Collection' is a data structure containing 'Features': + https://www.w3.org/TR/sdw-bp/#dfn-feature + + The features in this cube are restricted to have a geometry. Geometries can be points, lines, polygons etcetera. + A geometry is specified in a 'coordinate reference system'. https://www.w3.org/TR/sdw-bp/#dfn-coordinate-reference-system-(crs) + """ + + _DEFAULT_VECTOR_FORMAT = "GeoJSON" + + def __init__(self, graph: PGNode, connection: Union[Connection, None], metadata: Optional[CubeMetadata] = None): + super().__init__(pgnode=graph, connection=connection) + self.metadata = metadata + + @classmethod + def _build_metadata(cls, add_properties: bool = False) -> CollectionMetadata: + """Helper to build a (minimal) `CollectionMetadata` object.""" + # Vector cubes have at least a "geometry" dimension + dimensions = [Dimension(name="geometry", type="geometry")] + if add_properties: + dimensions.append(Dimension(name="properties", type="other")) + # TODO #464: use a more generic metadata container than "collection" metadata + return CollectionMetadata(metadata={}, dimensions=dimensions) + +
+[docs] + def process( + self, + process_id: str, + arguments: dict = None, + metadata: Optional[CollectionMetadata] = None, + namespace: Optional[str] = None, + **kwargs, + ) -> VectorCube: + """ + Generic helper to create a new VectorCube by applying a process. + + :param process_id: process id of the process. + :param args: argument dictionary for the process. + :return: new VectorCube instance + """ + pg = self._build_pgnode(process_id=process_id, arguments=arguments, namespace=namespace, **kwargs) + return VectorCube(graph=pg, connection=self._connection, metadata=metadata or self.metadata)
+ + +
+[docs] + @classmethod + @openeo_process + def load_geojson( + cls, + connection: Connection, + data: Union[dict, str, pathlib.Path, shapely.geometry.base.BaseGeometry, Parameter], + properties: Optional[List[str]] = None, + ) -> VectorCube: + """ + Converts GeoJSON data as defined by RFC 7946 into a vector data cube. + + :param connection: the connection to use to connect with the openEO back-end. + :param data: the geometry to load. One of: + + - GeoJSON-style data structure: e.g. a dictionary with ``"type": "Polygon"`` and ``"coordinates"`` fields + - a path to a local GeoJSON file + - a GeoJSON string + - a shapely geometry object + + :param properties: A list of properties from the GeoJSON file to construct an additional dimension from. + :return: new VectorCube instance + + .. warning:: EXPERIMENTAL: this process is experimental with the potential for major things to change. + + .. versionadded:: 0.22.0 + """ + # TODO: unify with `DataCube._get_geometry_argument` + # TODO #457 also support client side fetching of GeoJSON from URL? + if isinstance(data, str) and data.strip().startswith("{"): + # Assume JSON dump + geometry = json.loads(data) + elif isinstance(data, (str, pathlib.Path)): + # Assume local file + with pathlib.Path(data).open(mode="r", encoding="utf-8") as f: + geometry = json.load(f) + assert isinstance(geometry, dict) + elif isinstance(data, shapely.geometry.base.BaseGeometry): + geometry = shapely.geometry.mapping(data) + elif isinstance(data, Parameter): + geometry = data + elif isinstance(data, dict): + geometry = data + else: + raise ValueError(data) + # TODO #457 client side verification of GeoJSON construct: valid type, valid structure, presence of CRS, ...? + + pg = PGNode(process_id="load_geojson", data=geometry, properties=properties or []) + # TODO #457 always a "properties" dimension? https://github.com/Open-EO/openeo-processes/issues/448 + metadata = cls._build_metadata(add_properties=True) + return cls(graph=pg, connection=connection, metadata=metadata)
+ + +
+[docs] + @classmethod + @openeo_process + def load_url(cls, connection: Connection, url: str, format: str, options: Optional[dict] = None) -> VectorCube: + """ + Loads a file from a URL + + :param connection: the connection to use to connect with the openEO back-end. + :param url: The URL to read from. Authentication details such as API keys or tokens may need to be included in the URL. + :param format: The file format to use when loading the data. + :param options: The file format parameters to use when reading the data. + Must correspond to the parameters that the server reports as supported parameters for the chosen ``format`` + :return: new VectorCube instance + + .. warning:: EXPERIMENTAL: this process is experimental with the potential for major things to change. + + .. versionadded:: 0.22.0 + """ + pg = PGNode(process_id="load_url", arguments=dict_no_none(url=url, format=format, options=options)) + # TODO #457 always a "properties" dimension? https://github.com/Open-EO/openeo-processes/issues/448 + metadata = cls._build_metadata(add_properties=True) + return cls(graph=pg, connection=connection, metadata=metadata)
+ + +
+[docs] + @openeo_process + def run_udf( + self, + udf: Union[str, UDF], + runtime: Optional[str] = None, + version: Optional[str] = None, + context: Optional[dict] = None, + ) -> VectorCube: + """ + Run a UDF on the vector cube. + + It is recommended to provide the UDF just as :py:class:`UDF <openeo.rest._datacube.UDF>` instance. + (the other arguments could be used to override UDF parameters if necessary). + + :param udf: UDF code as a string or :py:class:`UDF <openeo.rest._datacube.UDF>` instance + :param runtime: UDF runtime + :param version: UDF version + :param context: UDF context + + .. warning:: EXPERIMENTAL: not generally supported, API subject to change. + + .. versionadded:: 0.10.0 + + .. versionchanged:: 0.16.0 + Added support to pass self-contained :py:class:`UDF <openeo.rest._datacube.UDF>` instance. + """ + if isinstance(udf, UDF): + # `UDF` instance is preferred usage pattern, but allow overriding. + version = version or udf.version + context = context or udf.context + runtime = runtime or udf.get_runtime(connection=self.connection) + udf = udf.code + else: + if not runtime: + raise ValueError("Argument `runtime` must be specified") + return self.process( + process_id="run_udf", + data=self, udf=udf, runtime=runtime, + arguments=dict_no_none({"version": version, "context": context}), + )
+ + +
+[docs] + @openeo_process + def save_result(self, format: Union[str, None] = "GeoJSON", options: dict = None): + # TODO #401: guard against duplicate save_result nodes? + return self.process( + process_id="save_result", + arguments={ + "data": self, + "format": format or "GeoJSON", + "options": options or {}, + }, + )
+ + +
+[docs] + def execute(self, *, validate: Optional[bool] = None) -> dict: + """Executes the process graph.""" + return self._connection.execute(self.flat_graph(), validate=validate)
+ + +
+[docs] + def download( + self, + outputfile: Optional[Union[str, pathlib.Path]] = None, + format: Optional[str] = None, + options: Optional[dict] = None, + *, + validate: Optional[bool] = None, + auto_add_save_result: bool = True, + ) -> Union[None, bytes]: + """ + Execute synchronously and download the vector cube. + + The result will be stored to the output path, when specified. + If no output path (or ``None``) is given, the raw download content will be returned as ``bytes`` object. + + :param outputfile: (optional) output file to store the result to + :param format: (optional) output format to use. + :param options: (optional) additional output format options. + :param validate: Optional toggle to enable/prevent validation of the process graphs before execution + (overruling the connection's ``auto_validate`` setting). + :param auto_add_save_result: Automatically add a ``save_result`` node to the process graph if there is none yet. + + .. versionchanged:: 0.21.0 + When not specified explicitly, output format is guessed from output file extension. + + .. versionchanged:: 0.32.0 + Added ``auto_add_save_result`` option + """ + # TODO #278 centralize download/create_job/execute_job logic in DataCube, VectorCube, MlModel, ... + cube = self + if auto_add_save_result: + cube = _ensure_save_result( + cube=cube, + format=format, + options=options, + weak_format=guess_format(outputfile) if outputfile else None, + default_format=self._DEFAULT_VECTOR_FORMAT, + method="VectorCube.download()", + ) + return self._connection.download(cube.flat_graph(), outputfile=outputfile, validate=validate)
+ + +
+[docs] + def execute_batch( + self, + outputfile: Optional[Union[str, pathlib.Path]] = None, + out_format: Optional[str] = None, + *, + title: Optional[str] = None, + description: Optional[str] = None, + plan: Optional[str] = None, + budget: Optional[float] = None, + print=print, + max_poll_interval: float = 60, + connection_retry_interval: float = 30, + additional: Optional[dict] = None, + job_options: Optional[dict] = None, + validate: Optional[bool] = None, + auto_add_save_result: bool = True, + # TODO: avoid using kwargs as format options + **format_options, + ) -> BatchJob: + """ + Evaluate the process graph by creating a batch job, and retrieving the results when it is finished. + This method is mostly recommended if the batch job is expected to run in a reasonable amount of time. + + For very long running jobs, you probably do not want to keep the client running. + + :param additional: additional (top-level) properties to set in the request body + :param job_options: dictionary of job options to pass to the backend + (under top-level property "job_options") + :param outputfile: The path of a file to which a result can be written + :param out_format: (optional) output format to use. + :param format_options: (optional) additional output format options + :param validate: Optional toggle to enable/prevent validation of the process graphs before execution + (overruling the connection's ``auto_validate`` setting). + :param auto_add_save_result: Automatically add a ``save_result`` node to the process graph if there is none yet. + + .. versionchanged:: 0.21.0 + When not specified explicitly, output format is guessed from output file extension. + + .. versionchanged:: 0.32.0 + Added ``auto_add_save_result`` option + + .. versionadded:: 0.36.0 + Added argument ``additional``. + """ + cube = self + if auto_add_save_result: + cube = _ensure_save_result( + cube=cube, + format=out_format, + options=format_options, + weak_format=guess_format(outputfile) if outputfile else None, + default_format=self._DEFAULT_VECTOR_FORMAT, + method="VectorCube.execute_batch()", + ) + job = cube.create_job( + title=title, + description=description, + plan=plan, + budget=budget, + additional=additional, + job_options=job_options, + validate=validate, + auto_add_save_result=False, + ) + return job.run_synchronous( + # TODO #135 support multi file result sets too + outputfile=outputfile, + print=print, max_poll_interval=max_poll_interval, connection_retry_interval=connection_retry_interval + )
+ + +
+[docs] + def create_job( + self, + out_format: Optional[str] = None, + *, + title: Optional[str] = None, + description: Optional[str] = None, + plan: Optional[str] = None, + budget: Optional[float] = None, + additional: Optional[dict] = None, + job_options: Optional[dict] = None, + validate: Optional[bool] = None, + auto_add_save_result: bool = True, + **format_options, + ) -> BatchJob: + """ + Sends a job to the backend and returns a ClientJob instance. + + :param out_format: String Format of the job result. + :param title: job title + :param description: job description + :param plan: The billing plan to process and charge the job with + :param budget: Maximum budget to be spent on executing the job. + Note that some backends do not honor this limit. + :param additional: additional (top-level) properties to set in the request body + :param job_options: dictionary of job options to pass to the backend + (under top-level property "job_options") + :param format_options: String Parameters for the job result format + :param validate: Optional toggle to enable/prevent validation of the process graphs before execution + (overruling the connection's ``auto_validate`` setting). + :param auto_add_save_result: Automatically add a ``save_result`` node to the process graph if there is none yet. + + :return: Created job. + + .. versionchanged:: 0.32.0 + Added ``auto_add_save_result`` option + """ + # TODO: avoid using all kwargs as format_options + # TODO #278 centralize download/create_job/execute_job logic in DataCube, VectorCube, MlModel, ... + cube = self + if auto_add_save_result: + cube = _ensure_save_result( + cube=cube, + format=out_format, + options=format_options or None, + default_format=self._DEFAULT_VECTOR_FORMAT, + method="VectorCube.create_job()", + ) + return self._connection.create_job( + process_graph=cube.flat_graph(), + title=title, + description=description, + plan=plan, + budget=budget, + additional=additional, + job_options=job_options, + validate=validate, + )
+ + + send_job = legacy_alias(create_job, name="send_job", since="0.10.0") + +
+[docs] + @openeo_process + def filter_bands(self, bands: List[str]) -> VectorCube: + """ + .. versionadded:: 0.22.0 + """ + # TODO #459 docs + return self.process( + process_id="filter_bands", + arguments={"data": THIS, "bands": bands}, + )
+ + +
+[docs] + @openeo_process + def filter_bbox( + self, + *, + west: Optional[float] = None, + south: Optional[float] = None, + east: Optional[float] = None, + north: Optional[float] = None, + extent: Optional[Union[dict, List[float], Tuple[float, float, float, float], Parameter]] = None, + crs: Optional[int] = None, + ) -> VectorCube: + """ + .. versionadded:: 0.22.0 + """ + # TODO #459 docs + if any(c is not None for c in [west, south, east, north]): + if extent is not None: + raise InvalidBBoxException("Don't specify both west/south/east/north and extent") + extent = dict_no_none(west=west, south=south, east=east, north=north) + + if isinstance(extent, Parameter): + pass + else: + extent = to_bbox_dict(extent, crs=crs) + return self.process( + process_id="filter_bbox", + arguments={"data": THIS, "extent": extent}, + )
+ + +
+[docs] + @openeo_process + def filter_labels( + self, condition: Union[PGNode, Callable], dimension: str, context: Optional[dict] = None + ) -> VectorCube: + """ + Filters the dimension labels in the data cube for the given dimension. + Only the dimension labels that match the specified condition are preserved, + all other labels with their corresponding data get removed. + + :param condition: the "child callback" which will be given a single label value (number or string) + and returns a boolean expressing if the label should be preserved. + Also see :ref:`callbackfunctions`. + :param dimension: The name of the dimension to filter on. + + .. versionadded:: 0.22.0 + """ + condition = build_child_callback(condition, parent_parameters=["value"]) + return self.process( + process_id="filter_labels", + arguments=dict_no_none(data=THIS, condition=condition, dimension=dimension, context=context), + )
+ + +
+[docs] + @openeo_process + def filter_vector( + self, geometries: Union["VectorCube", shapely.geometry.base.BaseGeometry, dict], relation: str = "intersects" + ) -> VectorCube: + """ + .. versionadded:: 0.22.0 + """ + # TODO #459 docs + if not isinstance(geometries, (VectorCube, Parameter)): + geometries = self.load_geojson(connection=self.connection, data=geometries) + return self.process( + process_id="filter_vector", + arguments={"data": THIS, "geometries": geometries, "relation": relation}, + )
+ + +
+[docs] + @openeo_process + def fit_class_random_forest( + self, + # TODO #279 #293: target type should be `VectorCube` (with adapters for GeoJSON FeatureCollection, GeoPandas, ...) + target: dict, + # TODO #293 max_variables officially has no default + max_variables: Optional[int] = None, + num_trees: int = 100, + seed: Optional[int] = None, + ) -> MlModel: + """ + Executes the fit of a random forest classification based on the user input of target and predictors. + The Random Forest classification model is based on the approach by Breiman (2001). + + .. warning:: EXPERIMENTAL: not generally supported, API subject to change. + + :param target: The training sites for the classification model as a vector data cube. This is associated with the target + variable for the Random Forest model. The geometry has to be associated with a value to predict (e.g. fractional + forest canopy cover). + :param max_variables: Specifies how many split variables will be used at a node. Default value is `null`, which corresponds to the + number of predictors divided by 3. + :param num_trees: The number of trees build within the Random Forest classification. + :param seed: A randomization seed to use for the random sampling in training. + + .. versionadded:: 0.16.0 + Originally added in version 0.10.0 as :py:class:`DataCube <openeo.rest.datacube.DataCube>` method, + but moved to :py:class:`VectorCube` in version 0.16.0. + """ + pgnode = PGNode( + process_id="fit_class_random_forest", + arguments=dict_no_none( + predictors=self, + # TODO #279 strictly per-spec, target should be a `vector-cube`, but due to lack of proper support we are limited to inline GeoJSON for now + target=target, + max_variables=max_variables, + num_trees=num_trees, + seed=seed, + ), + ) + model = MlModel(graph=pgnode, connection=self._connection) + return model
+ + +
+[docs] + @openeo_process + def fit_regr_random_forest( + self, + # TODO #279 #293: target type should be `VectorCube` (with adapters for GeoJSON FeatureCollection, GeoPandas, ...) + target: dict, + # TODO #293 max_variables officially has no default + max_variables: Optional[int] = None, + num_trees: int = 100, + seed: Optional[int] = None, + ) -> MlModel: + """ + Executes the fit of a random forest regression based on training data. + The Random Forest regression model is based on the approach by Breiman (2001). + + .. warning:: EXPERIMENTAL: not generally supported, API subject to change. + + :param target: The training sites for the regression model as a vector data cube. + This is associated with the target variable for the Random Forest model. + The geometry has to associated with a value to predict (e.g. fractional forest canopy cover). + :param max_variables: Specifies how many split variables will be used at a node. Default value is `null`, which corresponds to the + number of predictors divided by 3. + :param num_trees: The number of trees build within the Random Forest classification. + :param seed: A randomization seed to use for the random sampling in training. + + .. versionadded:: 0.16.0 + Originally added in version 0.10.0 as :py:class:`DataCube <openeo.rest.datacube.DataCube>` method, + but moved to :py:class:`VectorCube` in version 0.16.0. + """ + # TODO #279 #293: `fit_class_random_forest` should be defined on VectorCube instead of DataCube + pgnode = PGNode( + process_id="fit_regr_random_forest", + arguments=dict_no_none( + predictors=self, + # TODO #279 strictly per-spec, target should be a `vector-cube`, but due to lack of proper support we are limited to inline GeoJSON for now + target=target, + max_variables=max_variables, + num_trees=num_trees, + seed=seed, + ), + ) + model = MlModel(graph=pgnode, connection=self._connection) + return model
+ + +
+[docs] + @openeo_process + def apply_dimension( + self, + process: Union[str, typing.Callable, UDF, PGNode], + dimension: str, + target_dimension: Optional[str] = None, + context: Optional[dict] = None, + ) -> VectorCube: + """ + Applies a process to all values along a dimension of a data cube. + For example, if the temporal dimension is specified the process will work on the values of a time series. + + The process to apply is specified by providing a callback function in the `process` argument. + + :param process: the "child callback": + the name of a single process, + or a callback function as discussed in :ref:`callbackfunctions`, + or a :py:class:`UDF <openeo.rest._datacube.UDF>` instance. + + The callback should correspond to a process that + receives an array of numerical values + and returns an array of numerical values. + For example: + + - ``"sort"`` (string) + - :py:func:`sort <openeo.processes.sort>` (:ref:`predefined openEO process function <openeo_processes_functions>`) + - ``lambda data: data.concat([42, -3])`` (function or lambda) + + + :param dimension: The name of the source dimension to apply the process on. Fails with a DimensionNotAvailable error if the specified dimension does not exist. + :param target_dimension: The name of the target dimension or null (the default) to use the source dimension + specified in the parameter dimension. By specifying a target dimension, the source dimension is removed. + The target dimension with the specified name and the type other (see add_dimension) is created, if it doesn't exist yet. + :param context: Additional data to be passed to the process. + + :return: A datacube with the UDF applied to the given dimension. + :raises: DimensionNotAvailable + + .. versionadded:: 0.22.0 + """ + process = build_child_callback( + process=process, parent_parameters=["data", "context"], connection=self.connection + ) + arguments = dict_no_none( + { + "data": THIS, + "process": process, + "dimension": dimension, + "target_dimension": target_dimension, + "context": context, + } + ) + return self.process(process_id="apply_dimension", arguments=arguments)
+ + +
+[docs] + def vector_to_raster(self, target: openeo.rest.datacube.DataCube) -> openeo.rest.datacube.DataCube: + """ + Converts this vector cube (:py:class:`VectorCube`) into a raster data cube (:py:class:`~openeo.rest.datacube.DataCube`). + The bounding polygon of homogenous areas of pixels is constructed. + + :param target: a reference raster data cube to adopt the CRS/projection/resolution from. + + .. warning:: ``vector_to_raster`` is an experimental, non-standard process. It is not widely supported, and its API is subject to change. + + .. versionadded:: 0.28.0 + + """ + # TODO: this parameter sniffing is a temporary workaround until + # the `target` parameter name rename has fully settled + # https://github.com/Open-EO/openeo-python-driver/issues/274 + # After that has settled, it is still useful to verify assumptions about this non-standard process. + try: + process_spec = self.connection.describe_process("vector_to_raster") + target_parameter = process_spec["parameters"][1]["name"] + assert "target" in target_parameter + except Exception: + target_parameter = "target" + + pg_node = PGNode( + process_id="vector_to_raster", + arguments={"data": self, target_parameter: target}, + ) + # TODO: the correct metadata has to be passed here: + # replace "geometry" dimension with spatial dimensions of the target cube + return openeo.rest.datacube.DataCube(pg_node, connection=self._connection, metadata=self.metadata)
+
+ +
+ +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/openeo/testing.html b/_modules/openeo/testing.html new file mode 100644 index 000000000..eac8db416 --- /dev/null +++ b/_modules/openeo/testing.html @@ -0,0 +1,170 @@ + + + + + + + openeo.testing — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +

Source code for openeo.testing

+"""
+Utilities for testing of openEO client workflows.
+"""
+
+import json
+from pathlib import Path
+from typing import Callable, Optional, Union
+
+
+
+[docs] +class TestDataLoader: + """ + Helper to resolve paths to test data files, load them as JSON, optionally preprocess them, etc. + + It's intended to be used as a pytest fixture, e.g. from ``conftest.py``: + + .. code-block:: python + + @pytest.fixture + def test_data() -> TestDataLoader: + return TestDataLoader(root=Path(__file__).parent / "data") + + .. versionadded:: 0.30.0 + """ + + def __init__(self, root: Union[str, Path]): + self.data_root = Path(root) + +
+[docs] + def get_path(self, filename: Union[str, Path]) -> Path: + """Get absolute path to a test data file""" + return self.data_root / filename
+ + +
+[docs] + def load_json(self, filename: Union[str, Path], preprocess: Optional[Callable[[str], str]] = None) -> dict: + """Parse data from a test JSON file""" + data = self.get_path(filename).read_text(encoding="utf8") + if preprocess: + data = preprocess(data) + return json.loads(data)
+
+ +
+ +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/openeo/testing/results.html b/_modules/openeo/testing/results.html new file mode 100644 index 000000000..10b222cf7 --- /dev/null +++ b/_modules/openeo/testing/results.html @@ -0,0 +1,524 @@ + + + + + + + openeo.testing.results — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +

Source code for openeo.testing.results

+"""
+Assert functions for comparing actual (batch job) results against expected reference data.
+"""
+
+import json
+import logging
+import tempfile
+from pathlib import Path
+from typing import List, Optional, Union
+
+import xarray
+import xarray.testing
+
+from openeo.rest.job import DEFAULT_JOB_RESULTS_FILENAME, BatchJob, JobResults
+from openeo.util import repr_truncate
+
+_log = logging.getLogger(__name__)
+
+
+_DEFAULT_RTOL = 1e-6
+_DEFAULT_ATOL = 1e-6
+
+
+def _load_xarray_netcdf(path: Union[str, Path], **kwargs) -> xarray.Dataset:
+    """
+    Load a netCDF file as Xarray Dataset
+    """
+    _log.debug(f"_load_xarray_netcdf: {path!r}")
+    return xarray.load_dataset(path, **kwargs)
+
+
+def _load_rioxarray_geotiff(path: Union[str, Path], **kwargs) -> xarray.DataArray:
+    """
+    Load a GeoTIFF file as Xarray DataArray (using `rioxarray` extension).
+    """
+    _log.debug(f"_load_rioxarray_geotiff: {path!r}")
+    try:
+        import rioxarray
+    except ImportError as e:
+        raise ImportError("This feature requires 'rioxarray` as optional dependency.") from e
+    return rioxarray.open_rasterio(path, **kwargs)
+
+
+def _load_xarray(path: Union[str, Path], **kwargs) -> Union[xarray.Dataset, xarray.DataArray]:
+    """
+    Generically load a netCDF/GeoTIFF file as Xarray Dataset/DataArray.
+    """
+    path = Path(path)
+    if path.suffix.lower() in {".nc", ".netcdf"}:
+        return _load_xarray_netcdf(path, **kwargs)
+    elif path.suffix.lower() in {".tif", ".tiff", ".gtiff", ".geotiff"}:
+        return _load_rioxarray_geotiff(path, **kwargs)
+    raise ValueError(f"Unsupported file type: {path}")
+
+
+def _load_json(path: Union[str, Path]) -> dict:
+    """
+    Load a JSON file.
+    """
+    with Path(path).open("r", encoding="utf-8") as f:
+        return json.load(f)
+
+
+def _as_xarray_dataset(data: Union[str, Path, xarray.Dataset]) -> xarray.Dataset:
+    """
+    Get data as Xarray Dataset (loading from file if needed).
+    """
+    if isinstance(data, (str, Path)):
+        data = _load_xarray(data)
+    # TODO auto-convert DataArray to Dataset?
+    if not isinstance(data, xarray.Dataset):
+        raise ValueError(f"Unsupported type: {type(data)}")
+    return data
+
+
+def _as_xarray_dataarray(data: Union[str, Path, xarray.DataArray]) -> xarray.DataArray:
+    """
+    Convert a path to a NetCDF/GeoTIFF file to an Xarray DataArray.
+
+    :param data: path to a NetCDF/GeoTIFF file or Xarray DataArray
+    :return: Xarray DataArray
+    """
+    if isinstance(data, (str, Path)):
+        data = _load_xarray(data)
+    # TODO: auto-convert Dataset to DataArray?
+    if not isinstance(data, xarray.DataArray):
+        raise ValueError(f"Unsupported type: {type(data)}")
+    return data
+
+
+def _compare_xarray_dataarray(
+    actual: Union[xarray.DataArray, str, Path],
+    expected: Union[xarray.DataArray, str, Path],
+    *,
+    rtol: float = _DEFAULT_RTOL,
+    atol: float = _DEFAULT_ATOL,
+) -> List[str]:
+    """
+    Compare two xarray DataArrays with tolerance and report mismatch issues (as strings)
+
+    Checks that are done (with tolerance):
+    - (optional) Check fraction of mismatching pixels (difference exceeding some tolerance).
+      If fraction is below a given threshold, ignore these mismatches in subsequent comparisons.
+      If fraction is above the threshold, report this issue.
+    - Compare actual and expected data with `xarray.testing.assert_allclose` and specified tolerances.
+
+    :return: list of issues (empty if no issues)
+    """
+    # TODO: make this a public function?
+    # TODO: option for nodata fill value?
+    # TODO: option to include data type check?
+    # TODO: option to cast to some data type (or even rescale) before comparison?
+    # TODO: also compare attributes of the DataArray?
+    actual = _as_xarray_dataarray(actual)
+    expected = _as_xarray_dataarray(expected)
+    issues = []
+
+    # `xarray.testing.assert_allclose` currently does not always
+    # provides detailed information about shape/dimension mismatches
+    # so we enrich the issue listing with some more details
+    if actual.dims != expected.dims:
+        issues.append(f"Dimension mismatch: {actual.dims} != {expected.dims}")
+    for dim in sorted(set(expected.dims).intersection(actual.dims)):
+        acs = actual.coords[dim].values
+        ecs = expected.coords[dim].values
+        if not (acs.shape == ecs.shape and (acs == ecs).all()):
+            issues.append(f"Coordinates mismatch for dimension {dim!r}: {acs} != {ecs}")
+    if actual.shape != expected.shape:
+        issues.append(f"Shape mismatch: {actual.shape} != {expected.shape}")
+
+    try:
+        xarray.testing.assert_allclose(a=actual, b=expected, rtol=rtol, atol=atol)
+    except AssertionError as e:
+        # TODO: message of `assert_allclose` is typically multiline, split it again or make it one line?
+        issues.append(str(e).strip())
+
+    return issues
+
+
+
+[docs] +def assert_xarray_dataarray_allclose( + actual: Union[xarray.DataArray, str, Path], + expected: Union[xarray.DataArray, str, Path], + *, + rtol: float = _DEFAULT_RTOL, + atol: float = _DEFAULT_ATOL, +): + """ + Assert that two Xarray ``DataArray`` instances are equal (with tolerance). + + :param actual: actual data, provided as Xarray DataArray object or path to NetCDF/GeoTIFF file. + :param expected: expected or reference data, provided as Xarray DataArray object or path to NetCDF/GeoTIFF file. + :param rtol: relative tolerance + :param atol: absolute tolerance + :raises AssertionError: if not equal within the given tolerance + + .. versionadded:: 0.31.0 + + .. warning:: + This function is experimental and subject to change. + """ + issues = _compare_xarray_dataarray(actual=actual, expected=expected, rtol=rtol, atol=atol) + if issues: + raise AssertionError("\n".join(issues))
+ + + +def _compare_xarray_datasets( + actual: Union[xarray.Dataset, str, Path], + expected: Union[xarray.Dataset, str, Path], + *, + rtol: float = _DEFAULT_RTOL, + atol: float = _DEFAULT_ATOL, +) -> List[str]: + """ + Compare two xarray ``DataSet``s with tolerance and report mismatch issues (as strings) + + :return: list of issues (empty if no issues) + """ + # TODO: make this a public function? + actual = _as_xarray_dataset(actual) + expected = _as_xarray_dataset(expected) + + all_issues = [] + # TODO: just leverage DataSet support in xarray.testing.assert_allclose for all this? + actual_vars = set(actual.data_vars) + expected_vars = set(expected.data_vars) + _log.debug(f"_compare_xarray_datasets: actual_vars={actual_vars!r} expected_vars={expected_vars!r}") + if actual_vars != expected_vars: + all_issues.append(f"Xarray DataSet variables mismatch: {actual_vars} != {expected_vars}") + for var in expected_vars.intersection(actual_vars): + _log.debug(f"_compare_xarray_datasets: comparing variable {var!r}") + issues = _compare_xarray_dataarray(actual[var], expected[var], rtol=rtol, atol=atol) + if issues: + all_issues.append(f"Issues for variable {var!r}:") + all_issues.extend(issues) + return all_issues + + +
+[docs] +def assert_xarray_dataset_allclose( + actual: Union[xarray.Dataset, str, Path], + expected: Union[xarray.Dataset, str, Path], + *, + rtol: float = _DEFAULT_RTOL, + atol: float = _DEFAULT_ATOL, +): + """ + Assert that two Xarray ``DataSet`` instances are equal (with tolerance). + + :param actual: actual data, provided as Xarray Dataset object or path to NetCDF/GeoTIFF file + :param expected: expected or reference data, provided as Xarray Dataset object or path to NetCDF/GeoTIFF file. + :param rtol: relative tolerance + :param atol: absolute tolerance + :raises AssertionError: if not equal within the given tolerance + + .. versionadded:: 0.31.0 + + .. warning:: + This function is experimental and subject to change. + """ + issues = _compare_xarray_datasets(actual=actual, expected=expected, rtol=rtol, atol=atol) + if issues: + raise AssertionError("\n".join(issues))
+ + + +
+[docs] +def assert_xarray_allclose( + actual: Union[xarray.Dataset, xarray.DataArray, str, Path], + expected: Union[xarray.Dataset, xarray.DataArray, str, Path], + *, + rtol: float = _DEFAULT_RTOL, + atol: float = _DEFAULT_ATOL, +): + """ + Assert that two Xarray ``DataSet`` or ``DataArray`` instances are equal (with tolerance). + + :param actual: actual data, provided as Xarray object or path to NetCDF/GeoTIFF file. + :param expected: expected or reference data, provided as Xarray object or path to NetCDF/GeoTIFF file. + :param rtol: relative tolerance + :param atol: absolute tolerance + :raises AssertionError: if not equal within the given tolerance + + .. versionadded:: 0.31.0 + + .. warning:: + This function is experimental and subject to change. + """ + if isinstance(actual, (str, Path)): + actual = _load_xarray(actual) + if isinstance(expected, (str, Path)): + expected = _load_xarray(expected) + + if isinstance(actual, xarray.Dataset) and isinstance(expected, xarray.Dataset): + assert_xarray_dataset_allclose(actual, expected, rtol=rtol, atol=atol) + elif isinstance(actual, xarray.DataArray) and isinstance(expected, xarray.DataArray): + assert_xarray_dataarray_allclose(actual, expected, rtol=rtol, atol=atol) + else: + raise ValueError(f"Unsupported types: {type(actual)} and {type(expected)}")
+ + + +def _as_job_results_download( + job_results: Union[BatchJob, JobResults, str, Path], tmp_path: Optional[Path] = None +) -> Path: + """ + Produce a directory with downloaded job results assets and metadata. + + :param job_results: a batch job, job results metadata object or a path + :param tmp_path: root temp path to download results if needed + :return: + """ + # TODO: support download/copy from other sources (e.g. S3, ...) + if isinstance(job_results, BatchJob): + job_results = job_results.get_results() + if isinstance(job_results, JobResults): + download_dir = tempfile.mkdtemp(dir=tmp_path, prefix=job_results.get_job_id() + "-") + _log.info(f"Downloading results from job {job_results.get_job_id()} to {download_dir}") + job_results.download_files(target=download_dir) + job_results = download_dir + if isinstance(job_results, (str, Path)): + return Path(job_results) + else: + raise ValueError(f"Unsupported type: {type(job_results)}") + + +def _compare_job_results( + actual: Union[BatchJob, JobResults, str, Path], + expected: Union[BatchJob, JobResults, str, Path], + *, + rtol: float = _DEFAULT_RTOL, + atol: float = _DEFAULT_ATOL, + tmp_path: Optional[Path] = None, +) -> List[str]: + """ + Compare two job results sets (directories with downloaded assets and metadata, + e.g. as produced by ``JobResults.download_files()``) + + :return: list of issues (empty if no issues) + """ + actual_dir = _as_job_results_download(actual, tmp_path=tmp_path) + expected_dir = _as_job_results_download(expected, tmp_path=tmp_path) + _log.info(f"Comparing job results: {actual_dir!r} vs {expected_dir!r}") + + all_issues = [] + + actual_filenames = set(p.name for p in actual_dir.glob("*") if p.is_file()) + expected_filenames = set(p.name for p in expected_dir.glob("*") if p.is_file()) + if actual_filenames != expected_filenames: + all_issues.append(f"File set mismatch: {actual_filenames} != {expected_filenames}") + + for filename in expected_filenames.intersection(actual_filenames): + actual_path = actual_dir / filename + expected_path = expected_dir / filename + if filename == DEFAULT_JOB_RESULTS_FILENAME: + issues = _compare_job_result_metadata(actual=actual_path, expected=expected_path) + if issues: + all_issues.append(f"Issues for metadata file {filename!r}:") + all_issues.extend(issues) + elif expected_path.suffix.lower() in {".nc", ".netcdf"}: + issues = _compare_xarray_datasets(actual=actual_path, expected=expected_path, rtol=rtol, atol=atol) + if issues: + all_issues.append(f"Issues for file {filename!r}:") + all_issues.extend(issues) + elif expected_path.suffix.lower() in {".tif", ".tiff", ".gtiff", ".geotiff"}: + issues = _compare_xarray_dataarray(actual=actual_path, expected=expected_path, rtol=rtol, atol=atol) + if issues: + all_issues.append(f"Issues for file {filename!r}:") + all_issues.extend(issues) + else: + _log.warning(f"Unhandled job result asset {filename!r}") + + return all_issues + + +def _compare_job_result_metadata( + actual: Union[str, Path], + expected: Union[str, Path], +) -> List[str]: + issues = [] + actual_metadata = _load_json(actual) + expected_metadata = _load_json(expected) + + # Check "derived_from" links + actual_derived_from = set(k["href"] for k in actual_metadata.get("links", []) if k["rel"] == "derived_from") + expected_derived_from = set(k["href"] for k in expected_metadata.get("links", []) if k["rel"] == "derived_from") + + if actual_derived_from != expected_derived_from: + actual_only = actual_derived_from - expected_derived_from + expected_only = expected_derived_from - actual_derived_from + common = actual_derived_from.intersection(expected_derived_from) + issues.append( + f"Differing 'derived_from' links ({len(common)} common, {len(actual_only)} only in actual, {len(expected_only)} only in expected):\n" + f" only in actual: {repr_truncate(actual_only, width=1000)}\n" + f" only in expected: {repr_truncate(expected_only, width=1000)}." + ) + + # TODO: more metadata checks (e.g. spatial and temporal extents)? + + return issues + + +
+[docs] +def assert_job_results_allclose( + actual: Union[BatchJob, JobResults, str, Path], + expected: Union[BatchJob, JobResults, str, Path], + *, + rtol: float = _DEFAULT_RTOL, + atol: float = _DEFAULT_ATOL, + tmp_path: Optional[Path] = None, +): + """ + Assert that two job results sets are equal (with tolerance). + + :param actual: actual job results, provided as :py:class:`~openeo.rest.job.BatchJob` object, + :py:meth:`~openeo.rest.job.JobResults` object or path to directory with downloaded assets. + :param expected: expected job results, provided as :py:class:`~openeo.rest.job.BatchJob` object, + :py:meth:`~openeo.rest.job.JobResults` object or path to directory with downloaded assets. + :param rtol: relative tolerance + :param atol: absolute tolerance + :param tmp_path: root temp path to download results if needed. + It's recommended to pass pytest's `tmp_path` fixture here + :raises AssertionError: if not equal within the given tolerance + + .. versionadded:: 0.31.0 + + .. warning:: + This function is experimental and subject to change. + """ + issues = _compare_job_results(actual, expected, rtol=rtol, atol=atol, tmp_path=tmp_path) + if issues: + raise AssertionError("\n".join(issues))
+ +
+ +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/openeo/udf/debug.html b/_modules/openeo/udf/debug.html new file mode 100644 index 000000000..227a5ede6 --- /dev/null +++ b/_modules/openeo/udf/debug.html @@ -0,0 +1,157 @@ + + + + + + + openeo.udf.debug — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +

Source code for openeo.udf.debug

+"""
+Debug utilities for UDFs
+"""
+import logging
+import os
+import sys
+
+_log = logging.getLogger(__name__)
+_user_log = logging.getLogger(os.environ.get("OPENEO_UDF_USER_LOGGER", f"{__name__}.user"))
+
+
+
+[docs] +def inspect(data=None, message: str = "", code: str = "User", level: str = "info"): + """ + Implementation of the openEO `inspect` process for UDF contexts. + + Note that it is up to the back-end implementation to properly capture this logging + and include it in the batch job logs. + + :param data: data to log + :param message: message to send in addition to the data + :param code: A label to help identify one or more log entries + :param level: The severity level of this message. Allowed values: "error", "warning", "info", "debug" + + .. versionadded:: 0.10.1 + + .. seealso:: :ref:`udf_logging_with_inspect` + """ + extra = {"data": data, "code": code} + kwargs = {"stacklevel": 2} if sys.version_info >= (3, 8) else {} + _user_log.log(level=logging.getLevelName(level.upper()), msg=message, extra=extra, **kwargs)
+ +
+ +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/openeo/udf/run_code.html b/_modules/openeo/udf/run_code.html new file mode 100644 index 000000000..7cdc91c28 --- /dev/null +++ b/_modules/openeo/udf/run_code.html @@ -0,0 +1,458 @@ + + + + + + + openeo.udf.run_code — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +

Source code for openeo.udf.run_code

+"""
+
+Note: this module was initially developed under the ``openeo-udf`` project (https://github.com/Open-EO/openeo-udf)
+"""
+
+import functools
+import inspect
+import logging
+import math
+import pathlib
+import re
+from typing import Callable, List, Union
+
+import numpy
+import pandas
+import shapely
+import xarray
+from pandas import Series
+
+import openeo
+from openeo import UDF
+from openeo.udf import OpenEoUdfException
+from openeo.udf._compat import tomllib
+from openeo.udf.feature_collection import FeatureCollection
+from openeo.udf.structured_data import StructuredData
+from openeo.udf.udf_data import UdfData
+from openeo.udf.xarraydatacube import XarrayDataCube
+
+_log = logging.getLogger(__name__)
+
+
+def _build_default_execution_context():
+    # TODO: is it really necessary to "pre-load" these modules? Isn't user going to import them explicitly in their script anyway?
+    context = {
+        "numpy": numpy, "np": numpy,
+        "xarray": xarray,
+        "pandas": pandas, "pd": pandas,
+        "shapely": shapely,
+        "math": math,
+        "UdfData": UdfData,
+        "XarrayDataCube": XarrayDataCube,
+        "DataCube": XarrayDataCube,  # Legacy alias
+        "StructuredData": StructuredData,
+        "FeatureCollection": FeatureCollection,
+        # "SpatialExtent": SpatialExtent,  # TODO?
+        # "MachineLearnModel": MachineLearnModelConfig, # TODO?
+    }
+
+
+    return context
+
+
+@functools.lru_cache(maxsize=100)
+def load_module_from_string(code: str) -> dict:
+    """
+    Experimental: avoid loading same UDF module more than once, to make caching inside the udf work.
+    @param code:
+    @return:
+    """
+    globals = _build_default_execution_context()
+    exec(code, globals)
+    return globals
+
+
+def _get_annotation_str(annotation: Union[str, type]) -> str:
+    """Get parameter annotation as a string"""
+    if isinstance(annotation, str):
+        return annotation
+    elif isinstance(annotation, type):
+        mod = annotation.__module__
+        return (mod + "." if mod != str.__module__ else "") + annotation.__name__
+    else:
+        return str(annotation)
+
+
+def _annotation_is_pandas_series(annotation) -> bool:
+    return annotation in {pandas.Series, _get_annotation_str(pandas.Series)}
+
+
+def _annotation_is_udf_datacube(annotation) -> bool:
+    return annotation is XarrayDataCube or _get_annotation_str(annotation) in {
+        _get_annotation_str(XarrayDataCube),
+        'openeo_udf.api.datacube.DataCube',  # Legacy `openeo_udf` annotation
+    }
+
+def _annotation_is_data_array(annotation) -> bool:
+    return annotation is xarray.DataArray or _get_annotation_str(annotation) in {
+        _get_annotation_str(xarray.DataArray)
+    }
+
+def _annotation_is_udf_data(annotation) -> bool:
+    return annotation is UdfData or _get_annotation_str(annotation) in {
+        _get_annotation_str(UdfData),
+        'openeo_udf.api.udf_data.UdfData'  # Legacy `openeo_udf` annotation
+    }
+
+
+def _apply_timeseries_xarray(array: xarray.DataArray, callback: Callable[[Series], Series]) -> xarray.DataArray:
+    """
+    Apply timeseries callback to given xarray data array
+    along its time dimension (named "t" or "time")
+
+    :param array: array to transform
+    :param callback: function that transforms a timeseries in another (same size)
+    :return: transformed array
+    """
+    # Make time dimension the last one, and flatten the rest
+    # to create a 1D sequence of input time series (also 1D).
+    [time_position] = [i for (i, d) in enumerate(array.dims) if d in ["t", "time"]]
+    input_series = numpy.moveaxis(array.values, time_position, -1)
+    orig_shape = input_series.shape
+    input_series = input_series.reshape((-1, input_series.shape[-1]))
+
+    applied = numpy.asarray([callback(s) for s in input_series])
+
+    # Reshape to original shape
+    applied = applied.reshape(orig_shape)
+    applied = numpy.moveaxis(applied, -1, time_position)
+    assert applied.shape == array.shape
+
+    return xarray.DataArray(applied, coords=array.coords, dims=array.dims, name=array.name)
+
+
+def apply_timeseries_generic(
+        udf_data: UdfData,
+        callback: Callable[[Series, dict], Series]
+) -> UdfData:
+    """
+    Implements the UDF contract by calling a user provided time series transformation function.
+
+    :param udf_data:
+    :param callback: callable that takes a pandas Series and context dict and returns a pandas Series.
+        See template :py:func:`openeo.udf.udf_signatures.apply_timeseries`
+    :return:
+    """
+    callback = functools.partial(callback, context=udf_data.user_context)
+    datacubes = [
+        XarrayDataCube(_apply_timeseries_xarray(array=cube.array, callback=callback))
+        for cube in udf_data.get_datacube_list()
+    ]
+    # Insert the new tiles as list of raster collection tiles in the input object. The new tiles will
+    # replace the original input tiles.
+    udf_data.set_datacube_list(datacubes)
+    return udf_data
+
+
+def run_udf_code(code: str, data: UdfData) -> UdfData:
+    # TODO: current implementation uses first match directly, first check for multiple matches?
+    module = load_module_from_string(code)
+    functions = ((k, v) for (k, v) in module.items() if callable(v))
+
+    for (fn_name, func) in functions:
+        try:
+            sig = inspect.signature(func)
+        except ValueError:
+            continue
+        params = sig.parameters
+        first_param = next(iter(params.values()), None)
+
+        if (
+                fn_name == 'apply_timeseries'
+                and 'series' in params and 'context' in params
+                and _annotation_is_pandas_series(params["series"].annotation)
+                and _annotation_is_pandas_series(sig.return_annotation)
+        ):
+            _log.info("Found timeseries mapping UDF `{n}` {f!r}".format(n=fn_name, f=func))
+            return apply_timeseries_generic(data, func)
+        elif (
+                fn_name in ['apply_hypercube', 'apply_datacube']
+                and 'cube' in params and 'context' in params
+                and _annotation_is_udf_datacube(params["cube"].annotation)
+                and _annotation_is_udf_datacube(sig.return_annotation)
+        ):
+            _log.info("Found datacube mapping UDF `{n}` {f!r}".format(n=fn_name, f=func))
+            if len(data.get_datacube_list()) != 1:
+                raise ValueError("The provided UDF expects exactly one datacube, but {c} were provided.".format(
+                    c=len(data.get_datacube_list())
+                ))
+            # TODO: also support calls without user context?
+            result_cube = func(cube=data.get_datacube_list()[0], context=data.user_context)
+            data.set_datacube_list([result_cube])
+            return data
+        elif (
+                fn_name in ['apply_datacube']
+                and 'cube' in params and 'context' in params
+                and _annotation_is_data_array(params["cube"].annotation)
+                and _annotation_is_data_array(sig.return_annotation)
+        ):
+            _log.info("Found datacube mapping UDF `{n}` {f!r}".format(n=fn_name, f=func))
+            if len(data.get_datacube_list()) != 1:
+                raise ValueError("The provided UDF expects exactly one datacube, but {c} were provided.".format(
+                    c=len(data.get_datacube_list())
+                ))
+            # TODO: also support calls without user context?
+            result_cube: xarray.DataArray = func(cube=data.get_datacube_list()[0].get_array(), context=data.user_context)
+            data.set_datacube_list([XarrayDataCube(result_cube)])
+            return data
+        elif (
+            fn_name in ["apply_vectorcube"]
+            and "geometries" in params
+            and _get_annotation_str(params["geometries"].annotation) == "geopandas.geodataframe.GeoDataFrame"
+            and "cube" in params
+            and _annotation_is_data_array(params["cube"].annotation)
+        ):
+            if data.get_feature_collection_list is None or data.get_datacube_list() is None:
+                raise ValueError(
+                    "The provided UDF expects a FeatureCollection and a datacube, but received {f} and {c}".format(
+                        f=data.get_feature_collection_list(), c=data.get_datacube_list()
+                    )
+                )
+            if len(data.get_feature_collection_list()) != 1:
+                raise ValueError(
+                    "The provided UDF expects exactly one FeatureCollection, but {c} were provided.".format(
+                        c=len(data.get_feature_collection_list())
+                    )
+                )
+            if len(data.get_datacube_list()) != 1:
+                raise ValueError(
+                    "The provided UDF expects exactly one datacube, but {c} were provided.".format(
+                        c=len(data.get_datacube_list())
+                    )
+                )
+            # TODO: geopandas is optional dependency.
+            input_geoms = data.get_feature_collection_list()[0].data
+            input_cube = data.get_datacube_list()[0].get_array()
+            result_geoms, result_cube = func(geometries=input_geoms, cube=input_cube, context=data.user_context)
+            data.set_datacube_list([XarrayDataCube(result_cube)])
+            data.set_feature_collection_list([FeatureCollection(id="udf_result", data=result_geoms)])
+            return data
+        elif len(params) == 1 and _annotation_is_udf_data(first_param.annotation):
+            _log.info("Found generic UDF `{n}` {f!r}".format(n=fn_name, f=func))
+            func(data)
+            return data
+
+    raise OpenEoUdfException("No UDF found.")
+
+
+
+[docs] +def execute_local_udf(udf: Union[str, openeo.UDF], datacube: Union[str, xarray.DataArray, XarrayDataCube], fmt='netcdf'): + """ + Locally executes an user defined function on a previously downloaded datacube. + + :param udf: the code of the user defined function + :param datacube: the path to the downloaded data in disk or a DataCube + :param fmt: format of the file if datacube is string + :return: the resulting DataCube + """ + if isinstance(udf, openeo.UDF): + udf = udf.code + + if isinstance(datacube, (str, pathlib.Path)): + d = XarrayDataCube.from_file(path=datacube, fmt=fmt) + elif isinstance(datacube, XarrayDataCube): + d = datacube + elif isinstance(datacube, xarray.DataArray): + d = XarrayDataCube(datacube) + else: + raise ValueError(datacube) + d_array = d.get_array() + expected_order = ("t", "bands", "y", "x") + dims = [d for d in expected_order if d in d_array.dims] + + # TODO #472: skip going through XarrayDataCube above, we only need xarray.DataArray here anyway. + d = XarrayDataCube( + d_array.transpose(*dims) + # TODO: this float conversion was in original implementation (0962e00e03) but is that actually necessary? + .astype(numpy.float64) + ) + # wrap to udf_data + udf_data = UdfData(datacube_list=[d]) + + # TODO: enrich to other types like time series, vector data,... probalby by adding named arguments + # signature: UdfData(proj, datacube_list, feature_collection_list, structured_data_list, ml_model_list, metadata) + + # run the udf through the same routine as it would have been parsed in the backend + result = run_udf_code(udf, udf_data) + return result
+ + + +
+[docs] +def extract_udf_dependencies(udf: Union[str, UDF]) -> Union[List[str], None]: + """ + Extract dependencies from UDF code declared in a top-level comment block + following the `inline script metadata specification (PEP 508) <https://packaging.python.org/en/latest/specifications/inline-script-metadata>`_. + + Basic example UDF snippet declaring expected dependencies as embedded metadata + in a comment block: + + .. code-block:: python + + # /// script + # dependencies = [ + # "geojson", + # ] + # /// + + import geojson + + def apply_datacube(cube: xarray.DataArray, context: dict) -> xarray.DataArray: + ... + + .. seealso:: :ref:`python-udf-dependency-declaration` for more in-depth information. + + :param udf: UDF code as a string or :py:class:`~openeo.rest._datacube.UDF` object + :return: List of extracted dependencies or ``None`` when no valid metadata block with dependencies was found. + + .. versionadded:: 0.30.0 + """ + udf_code = udf.code if isinstance(udf, UDF) else udf + + # Extract "script" blocks + script_type = "script" + block_regex = re.compile( + r"^# /// (?P<type>[a-zA-Z0-9-]+)\s*$\s(?P<content>(^#(| .*)$\s)+)^# ///$", flags=re.MULTILINE + ) + script_blocks = [ + match.group("content") for match in block_regex.finditer(udf_code) if match.group("type") == script_type + ] + + if len(script_blocks) > 1: + raise ValueError(f"Multiple {script_type!r} blocks found in top-level comment") + elif len(script_blocks) == 0: + return None + + # Extract dependencies from "script" block + content = "".join( + line[2:] if line.startswith("# ") else line[1:] for line in script_blocks[0].splitlines(keepends=True) + ) + + return tomllib.loads(content).get("dependencies")
+ +
+ +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/openeo/udf/structured_data.html b/_modules/openeo/udf/structured_data.html new file mode 100644 index 000000000..dad884749 --- /dev/null +++ b/_modules/openeo/udf/structured_data.html @@ -0,0 +1,174 @@ + + + + + + + openeo.udf.structured_data — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +

Source code for openeo.udf.structured_data

+"""
+
+"""
+
+# Note: this module was initially developed under the ``openeo-udf`` project (https://github.com/Open-EO/openeo-udf)
+
+from __future__ import annotations
+
+import builtins
+from typing import Union
+
+
+
+[docs] +class StructuredData: + """ + This class represents structured data that is produced by an UDF and can not be represented + as a raster or vector data cube. For example: the result of a statistical + computation. + + Usage example:: + + >>> StructuredData([3, 5, 8, 13]) + >>> StructuredData({"mean": 5, "median": 8}) + >>> StructuredData([('col_1', 'col_2'), (1, 2), (2, 3)], type="table") + """ + + def __init__(self, data: Union[list, dict], description: str = None, type: str = None): + self.data = data + self.type = type or builtins.type(data).__name__ + self.description = description or self.type + + def __repr__(self): + return f"<{type(self).__name__} with {self.type}>" + + def to_dict(self) -> dict: + return dict( + data=self.data, + description=self.description, + type=self.type, + ) + + @classmethod + def from_dict(cls, data: dict) -> StructuredData: + return cls( + data=data["data"], + description=data.get("description"), + type=data.get("type") + )
+ +
+ +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/openeo/udf/udf_data.html b/_modules/openeo/udf/udf_data.html new file mode 100644 index 000000000..fa51b7140 --- /dev/null +++ b/_modules/openeo/udf/udf_data.html @@ -0,0 +1,283 @@ + + + + + + + openeo.udf.udf_data — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +

Source code for openeo.udf.udf_data

+"""
+
+"""
+
+# Note: this module was initially developed under the ``openeo-udf`` project (https://github.com/Open-EO/openeo-udf)
+
+from __future__ import annotations
+
+from typing import List, Optional, Union
+
+from openeo.udf.feature_collection import FeatureCollection
+from openeo.udf.structured_data import StructuredData
+from openeo.udf.xarraydatacube import XarrayDataCube
+
+
+
+[docs] +class UdfData: + """ + Container for data passed to a user defined function (UDF) + """ + + # TODO: original implementation in `openeo_udf` project had `get_datacube_by_id`, `get_feature_collection_by_id`: is it still useful to provide this? + # TODO: original implementation in `openeo_udf` project had `server_context`: is it still useful to provide this? + + def __init__( + self, + proj: dict = None, + datacube_list: Optional[List[XarrayDataCube]] = None, + feature_collection_list: Optional[List[FeatureCollection]] = None, + structured_data_list: Optional[List[StructuredData]] = None, + user_context: Optional[dict] = None, + ): + """ + The constructor of the UDF argument class that stores all data required by the + user defined function. + + :param proj: A dictionary of form {"proj type string": "projection description"} e.g. {"EPSG": 4326} + :param datacube_list: A list of data cube objects + :param feature_collection_list: A list of VectorTile objects + :param structured_data_list: A list of structured data objects + """ + self.datacube_list = datacube_list + self.feature_collection_list = feature_collection_list + self.structured_data_list = structured_data_list + self.proj = proj + self._user_context = user_context or {} + + def __repr__(self) -> str: + fields = " ".join( + f"{f}:{getattr(self, f)!r}" for f in + ["datacube_list", "feature_collection_list", "structured_data_list"] + ) + return f"<{type(self).__name__} {fields}>" + + @property + def user_context(self) -> dict: + """Return the user context that was passed to the run_udf function""" + return self._user_context + +
+[docs] + def get_datacube_list(self) -> Union[List[XarrayDataCube], None]: + """Get the data cube list""" + return self._datacube_list
+ + +
+[docs] + def set_datacube_list(self, datacube_list: Union[List[XarrayDataCube], None]): + """ + Set the data cube list + + :param datacube_list: A list of data cubes + """ + self._datacube_list = datacube_list
+ + + datacube_list = property(fget=get_datacube_list, fset=set_datacube_list) + +
+[docs] + def get_feature_collection_list(self) -> Union[List[FeatureCollection], None]: + """get all feature collections as list""" + return self._feature_collection_list
+ + + def set_feature_collection_list(self, feature_collection_list: Union[List[FeatureCollection], None]): + self._feature_collection_list = feature_collection_list + + feature_collection_list = property(fget=get_feature_collection_list, fset=set_feature_collection_list) + +
+[docs] + def get_structured_data_list(self) -> Union[List[StructuredData], None]: + """ + Get all structured data entries + + :return: A list of StructuredData objects + """ + return self._structured_data_list
+ + +
+[docs] + def set_structured_data_list(self, structured_data_list: Union[List[StructuredData], None]): + """ + Set the list of structured data + + :param structured_data_list: A list of StructuredData objects + """ + self._structured_data_list = structured_data_list
+ + + structured_data_list = property(fget=get_structured_data_list, fset=set_structured_data_list) + +
+[docs] + def to_dict(self) -> dict: + """ + Convert this UdfData object into a dictionary that can be converted into + a valid JSON representation + """ + return { + "datacubes": [x.to_dict() for x in self.datacube_list] \ + if self.datacube_list else None, + "feature_collection_list": [x.to_dict() for x in self.feature_collection_list] \ + if self.feature_collection_list else None, + "structured_data_list": [x.to_dict() for x in self.structured_data_list] \ + if self.structured_data_list else None, + "proj": self.proj, + "user_context": self.user_context, + }
+ + +
+[docs] + @classmethod + def from_dict(cls, udf_dict: dict) -> UdfData: + """ + Create a udf data object from a python dictionary that was created from + the JSON definition of the UdfData class + + :param udf_dict: The dictionary that contains the udf data definition + """ + + datacubes = [XarrayDataCube.from_dict(x) for x in udf_dict.get("datacubes", [])] + feature_collection_list = [FeatureCollection.from_dict(x) for x in udf_dict.get("feature_collection_list", [])] + structured_data_list = [StructuredData.from_dict(x) for x in udf_dict.get("structured_data_list", [])] + udf_data = cls( + proj=udf_dict.get("proj"), + datacube_list=datacubes, + feature_collection_list=feature_collection_list, + structured_data_list=structured_data_list, + user_context=udf_dict.get("user_context") + ) + return udf_data
+
+ +
+ +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/openeo/udf/udf_signatures.html b/_modules/openeo/udf/udf_signatures.html new file mode 100644 index 000000000..a6e94580b --- /dev/null +++ b/_modules/openeo/udf/udf_signatures.html @@ -0,0 +1,248 @@ + + + + + + + openeo.udf.udf_signatures — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +

Source code for openeo.udf.udf_signatures

+"""
+This module defines a number of function signatures that can be implemented by UDF's.
+Both the name of the function and the argument types are/can be used by the backend to validate if the provided UDF
+is compatible with the calling context of the process graph in which it is used.
+
+"""
+# Note: this module was initially developed under the ``openeo-udf`` project (https://github.com/Open-EO/openeo-udf)
+
+import xarray
+from pandas import Series
+
+from openeo.metadata import CollectionMetadata
+from openeo.udf.udf_data import UdfData
+from openeo.udf.xarraydatacube import XarrayDataCube
+
+try:
+    # Geopandas is an optional dependency, but one of the signatures uses it as type annotation
+    import geopandas
+except ImportError:
+    pass
+
+
+
+[docs] +def apply_timeseries(series: Series, context: dict) -> Series: + """ + Process a timeseries of values, without changing the time instants. + + This can for instance be used for smoothing or gap-filling. + + :param series: A Pandas Series object with a date-time index. + :param context: A dictionary containing user context. + :return: A Pandas Series object with the same datetime index. + """ + # TODO: do we need geospatial coordinates for the series? + return series
+ + + +
+[docs] +def apply_datacube(cube: XarrayDataCube, context: dict) -> XarrayDataCube: + """ + Map a :py:class:`XarrayDataCube` to another :py:class:`XarrayDataCube`. + + Depending on the context in which this function is used, the :py:class:`XarrayDataCube` dimensions + have to be retained or can be chained. + For instance, in the context of a reducing operation along a dimension, + that dimension will have to be reduced to a single value. + In the context of a 1 to 1 mapping operation, all dimensions have to be retained. + + :param cube: input data cube + :param context: A dictionary containing user context. + :return: output data cube + """ + return cube
+ + + +
+[docs] +def apply_udf_data(data: UdfData): + """ + Generic UDF function that directly manipulates a :py:class:`UdfData` object + + :param data: :py:class:`UdfData` object to manipulate in-place + """ + pass
+ + + +
+[docs] +def apply_metadata(metadata: CollectionMetadata, context: dict) -> CollectionMetadata: + """ + .. warning:: + This signature is not yet fully standardized and subject to change. + + Returns the expected cube metadata, after applying this UDF, based on input metadata. + The provided metadata represents the whole raster or vector cube. This function does not need to be called for every data chunk. + + When this function is not implemented by the UDF, the backend may still be able to infer correct metadata by running the + UDF, but this can result in reduced performance or errors. + + This function does not need to be provided when using the UDF in combination with processes that by design have a clear + effect on cube metadata, such as :py:meth:`~openeo.rest.datacube.DataCube.reduce_dimension()` + + :param metadata: the collection metadata of the input data cube + :param context: A dictionary containing user context. + + :return: output metadata: the expected metadata of the cube, after applying the udf + + Examples + -------- + + An example for a UDF that is applied on the 'bands' dimension, and returns a new set of bands with different labels. + + >>> def apply_metadata(metadata: CollectionMetadata, context: dict) -> CollectionMetadata: + ... return metadata.rename_labels( + ... dimension="bands", + ... target=["computed_band_1", "computed_band_2"] + ... ) + + """ + pass
+ + + +
+[docs] +def apply_vectorcube( + geometries: "geopandas.geodataframe.GeoDataFrame", cube: xarray.DataArray, context: dict +) -> ("geopandas.geodataframe.GeoDataFrame", xarray.DataArray): + """ + Map a vector cube to another vector cube. + + :param geometries: input geometries as a geopandas.GeoDataFrame. This contains the actual shapely geometries and optional properties. + :param cube: a data cube with dimensions (geometries, time, bands) where time and bands are optional. + The coordinates for the geometry dimension are integers and match the index of the geometries in the geometries parameter. + :param context: A dictionary containing user context. + :return: output geometries, output data cube + """ + pass
+ +
+ +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/openeo/udf/xarraydatacube.html b/_modules/openeo/udf/xarraydatacube.html new file mode 100644 index 000000000..b9baa1192 --- /dev/null +++ b/_modules/openeo/udf/xarraydatacube.html @@ -0,0 +1,526 @@ + + + + + + + openeo.udf.xarraydatacube — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +

Source code for openeo.udf.xarraydatacube

+"""
+
+"""
+
+# Note: this module was initially developed under the ``openeo-udf`` project (https://github.com/Open-EO/openeo-udf)
+
+from __future__ import annotations
+
+import collections
+import json
+import typing
+from pathlib import Path
+from typing import Optional, Union
+
+import numpy
+import xarray
+
+from openeo.udf import OpenEoUdfException
+from openeo.util import deep_get, dict_no_none
+
+if typing.TYPE_CHECKING:
+    # Imports for type checking only (circular import issue at runtime).
+    import matplotlib.colors
+
+
+
+[docs] +class XarrayDataCube: + """ + This is a thin wrapper around :py:class:`xarray.DataArray` + providing a basic "DataCube" interface for openEO UDF usage around multi-dimensional data. + """ + + # TODO #472 This class, just wrapping an array.DataArray, seems to make things more complicated/confusing than necessary. + + def __init__(self, array: xarray.DataArray): + if not isinstance(array, xarray.DataArray): + raise OpenEoUdfException("Argument data must be of type xarray.DataArray") + self._array = array + + def __repr__(self): + return f"<{type(self).__name__} shape:{self._array.shape}>" + +
+[docs] + def get_array(self) -> xarray.DataArray: + """ + Get the :py:class:`xarray.DataArray` that contains the data and dimension definition + """ + return self._array
+ + + array = property(fget=get_array) + + @property + def id(self): + return self._array.name + +
+[docs] + def to_dict(self) -> dict: + """ + Convert this hypercube into a dictionary that can be converted into + a valid JSON representation + + >>> example = { + ... "id": "test_data", + ... "data": [ + ... [[0.0, 0.1], [0.2, 0.3]], + ... [[0.0, 0.1], [0.2, 0.3]], + ... ], + ... "dimension": [ + ... {"name": "time", "coordinates": ["2001-01-01", "2001-01-02"]}, + ... {"name": "X", "coordinates": [50.0, 60.0]}, + ... {"name": "Y"}, + ... ], + ... } + """ + xd = self._array.to_dict() + return dict_no_none({ + "id": xd.get("name"), + "data": xd.get("data"), + "description": deep_get(xd, "attrs", "description", default=None), + "dimensions": [ + dict_no_none( + name=dim, + coordinates=deep_get(xd, "coords", dim, "data", default=None) + ) + for dim in xd.get("dims", []) + ] + })
+ + +
+[docs] + @classmethod + def from_dict(cls, xdc_dict: dict) -> XarrayDataCube: + """ + Create a :py:class:`XarrayDataCube` from a Python dictionary that was created from + the JSON definition of the data cube + + :param data: The dictionary that contains the data cube definition + """ + + if "data" not in xdc_dict: + raise OpenEoUdfException("Missing data in dictionary") + + data = numpy.asarray(xdc_dict["data"]) + + if "dimensions" in xdc_dict: + dims = [dim["name"] for dim in xdc_dict["dimensions"]] + coords = {dim["name"]: dim["coordinates"] for dim in xdc_dict["dimensions"] if "coordinates" in dim} + else: + dims = None + coords = None + + x = xarray.DataArray(data, dims=dims, coords=coords, name=xdc_dict.get("id")) + + if "description" in xdc_dict: + x.attrs["description"] = xdc_dict["description"] + + return cls(array=x)
+ + + @staticmethod + def _guess_format(path: Union[str, Path]) -> str: + """Guess file format from file name.""" + suffix = Path(path).suffix.lower() + if suffix in [".nc", ".netcdf"]: + return "netcdf" + elif suffix in [".json"]: + return "json" + else: + raise ValueError("Can not guess format of {p}".format(p=path)) + +
+[docs] + @classmethod + def from_file(cls, path: Union[str, Path], fmt=None, **kwargs) -> XarrayDataCube: + """ + Load data file as :py:class:`XarrayDataCube` in memory + + :param path: the file on disk + :param fmt: format to load from, e.g. "netcdf" or "json" + (will be auto-detected when not specified) + + :return: loaded data cube + """ + fmt = fmt or cls._guess_format(path) + if fmt.lower() == 'netcdf': + return cls(array=XarrayIO.from_netcdf_file(path=path, **kwargs)) + elif fmt.lower() == 'json': + return cls(array=XarrayIO.from_json_file(path=path)) + else: + raise ValueError("invalid format {f}".format(f=fmt))
+ + +
+[docs] + def save_to_file(self, path: Union[str, Path], fmt=None, **kwargs): + """ + Store :py:class:`XarrayDataCube` to file + + :param path: destination file on disk + :param fmt: format to save as, e.g. "netcdf" or "json" + (will be auto-detected when not specified) + """ + fmt = fmt or self._guess_format(path) + if fmt.lower() == 'netcdf': + XarrayIO.to_netcdf_file(array=self.get_array(), path=path, **kwargs) + elif fmt.lower() == 'json': + XarrayIO.to_json_file(array=self.get_array(), path=path) + else: + raise ValueError(fmt)
+ + +
+[docs] + def plot( + self, + title: str = None, + limits=None, + show_bandnames: bool = True, + show_dates: bool = True, + show_axeslabels: bool = False, + fontsize: float = 10., + oversample: float = 1, + cmap: Union[str, 'matplotlib.colors.Colormap'] = 'RdYlBu_r', + cbartext: str = None, + to_file: str = None, + to_show: bool = True + ): + """ + Visualize a :py:class:`XarrayDataCube` with matplotlib + + :param datacube: data to plot + :param title: title text drawn in the top left corner (default: nothing) + :param limits: range of the contour plot as a tuple(min,max) (default: None, in which case the min/max is computed from the data) + :param show_bandnames: whether to plot the column names (default: True) + :param show_dates: whether to show the dates for each row (default: True) + :param show_axeslabels: whether to show the labels on the axes (default: False) + :param fontsize: font size in pixels (default: 10) + :param oversample: one value is plotted into oversample x oversample number of pixels (default: 1 which means each value is plotted as a single pixel) + :param cmap: built-in matplotlib color map name or ColorMap object (default: RdYlBu_r which is a blue-yellow-red rainbow) + :param cbartext: text on top of the legend (default: nothing) + :param to_file: filename to save the image to (default: None, which means no file is generated) + :param to_show: whether to show the image in a matplotlib window (default: True) + + :return: None + """ + from matplotlib import pyplot + + data = self.get_array() + if limits is None: + vmin = data.min() + vmax = data.max() + else: + vmin = limits[0] + vmax = limits[1] + + # fill bands and t if missing + if 'bands' not in data.dims: + data = data.expand_dims(dim={'bands': ['band0']}) + if 't' not in data.dims: + data = data.expand_dims(dim={'t': [numpy.datetime64('today')]}) + if 'bands' not in data.coords: + data['bands'] = ['band0'] + if 't' not in data.coords: + data['t'] = [numpy.datetime64('today')] + + # align with plot + data = data.transpose('t', 'bands', 'y', 'x') + dpi = 100 + xres = len(data.x) / dpi + yres = len(data.y) / dpi + fs = fontsize / oversample + frame = 0.33 + + nrow = data.shape[0] + ncol = data.shape[1] + + fig = pyplot.figure(figsize=((ncol + frame) * xres * 1.1, (nrow + frame) * yres), dpi=int(dpi * oversample)) + gs = pyplot.GridSpec(nrow, ncol, wspace=0., hspace=0., top=nrow / (nrow + frame), bottom=0., + left=frame / (ncol + frame), right=1.) + + xmin = data.x.min() + xmax = data.x.max() + ymin = data.y.min() + ymax = data.y.max() + + # flip around if incorrect, this is in harmony with origin='lower' + if (data.x[0] > data.x[-1]): + data = data.reindex(x=list(reversed(data.x))) + if (data.y[0] > data.y[-1]): + data = data.reindex(y=list(reversed(data.y))) + + extent = (data.x[0], data.x[-1], data.y[0], data.y[-1]) + + for i in range(nrow): + for j in range(ncol): + im = data[i, j] + ax = pyplot.subplot(gs[i, j]) + ax.set_xlim(xmin, xmax) + ax.set_ylim(ymin, ymax) + img = ax.imshow(im, vmin=vmin, vmax=vmax, cmap=cmap, origin='lower', extent=extent) + ax.xaxis.set_tick_params(labelsize=fs) + ax.yaxis.set_tick_params(labelsize=fs) + if not show_axeslabels: + ax.set_axis_off() + ax.set_xticklabels([]) + ax.set_yticklabels([]) + if show_bandnames: + if i == 0: ax.text(0.5, 1.08, data.bands.values[j] + " (" + str(data.dtype) + ")", size=fs, + va="center", + ha="center", transform=ax.transAxes) + if show_dates: + if j == 0: ax.text(-0.08, 0.5, data.t.dt.strftime("%Y-%m-%d").values[i], size=fs, va="center", + ha="center", rotation=90, transform=ax.transAxes) + + if title is not None: + fig.text(0., 1., title.split('/')[-1], size=fs, va="top", ha="left", weight='bold') + + cbar_ax = fig.add_axes([0.01, 0.1, 0.04, 0.5]) + if cbartext is not None: + fig.text(0.06, 0.62, cbartext, size=fs, va="bottom", ha="center") + cbar = fig.colorbar(img, cax=cbar_ax) + cbar.ax.tick_params(labelsize=fs) + cbar.outline.set_visible(False) + cbar.ax.tick_params(size=0) + cbar.ax.yaxis.set_tick_params(pad=0) + + if to_file is not None: + pyplot.savefig(str(to_file)) + if to_show: + pyplot.show() + + pyplot.close()
+
+ + + +class XarrayIO: + """ + Helpers to load/store :py:cass:`xarray.DataArray` objects, + with some conventions about expected dimensions/bands + """ + + @classmethod + def from_json_file(cls, path: Union[str, Path]) -> xarray.DataArray: + with Path(path).open() as f: + return cls.from_json(json.load(f)) + + @classmethod + def from_json(cls, d: dict) -> xarray.DataArray: + d['data'] = numpy.array(d['data'], dtype=numpy.dtype(d['attrs']['dtype'])) + for k, v in d['coords'].items(): + # prepare coordinate + d['coords'][k]['data'] = numpy.array(v['data'], dtype=v['attrs']['dtype']) + # remove dtype and shape, because that is included for helping the user + if d['coords'][k].get('attrs', None) is not None: + d['coords'][k]['attrs'].pop('dtype', None) + d['coords'][k]['attrs'].pop('shape', None) + + # remove dtype and shape, because that is included for helping the user + if d.get('attrs', None) is not None: + d['attrs'].pop('dtype', None) + d['attrs'].pop('shape', None) + # convert to xarray + r = xarray.DataArray.from_dict(d) + + # build dimension list in proper order + dims = list(filter(lambda i: i != 't' and i != 'bands' and i != 'x' and i != 'y', r.dims)) + if 't' in r.dims: dims += ['t'] + if 'bands' in r.dims: dims += ['bands'] + if 'x' in r.dims: dims += ['x'] + if 'y' in r.dims: dims += ['y'] + # return the resulting data array + return r.transpose(*dims) + + @classmethod + def from_netcdf_file(cls, path: Union[str, Path], engine: Optional[str] = None) -> xarray.DataArray: + # load the dataset and convert to data array + ds = xarray.open_dataset(path, engine=engine) + + # Skip non-numerical variables (like "crs") + band_vars = [k for k, v in ds.data_vars.items() if v.dtype.kind in {"b", "i", "u", "f"} and len(v.dims) > 0] + ds = ds[band_vars] + + r = ds.to_array(dim='bands') + + # Reorder dims to proper order (t-bands-x-y at the end) + expected_order = ("t", "bands", "x", "y") + dims = [d for d in r.dims if d not in expected_order] + [d for d in expected_order if d in r.dims] + + return r.transpose(*dims) + + @classmethod + def to_json_file(cls, array: xarray.DataArray, path: Union[str, Path]): + # to deserialized json + jsonarray = array.to_dict() + # add attributes that needed for re-creating xarray from json + jsonarray['attrs']['dtype'] = str(array.values.dtype) + jsonarray['attrs']['shape'] = list(array.values.shape) + for i in array.coords.values(): + jsonarray['coords'][i.name]['attrs']['dtype'] = str(i.dtype) + jsonarray['coords'][i.name]['attrs']['shape'] = list(i.shape) + # custom print so resulting json file is humanly easy to read + # TODO: make this human friendly JSON format optional and allow compact JSON too. + with Path(path).open("w", encoding="utf-8") as f: + def custom_print(data_structure, indent=1): + f.write("{\n") + needs_comma = False + for key, value in data_structure.items(): + if needs_comma: + f.write(',\n') + needs_comma = True + f.write(' ' * indent + json.dumps(key) + ':') + if isinstance(value, dict): + custom_print(value, indent + 1) + else: + json.dump(value, f, default=str, separators=(',', ':')) + f.write('\n' + ' ' * (indent - 1) + "}") + + custom_print(jsonarray) + + @classmethod + def to_netcdf_file(cls, array: xarray.DataArray, path: Union[str, Path], engine: Optional[str] = None): + # temp reference to avoid modifying the original array + result = array + # rearrange in a basic way because older xarray versions have a bug and ellipsis don't work in xarray.transpose() + if result.dims[-2] == 'x' and result.dims[-1] == 'y': + l = list(result.dims[:-2]) + result = result.transpose(*(l + ['y', 'x'])) + # turn it into a dataset where each band becomes a variable + if not 'bands' in result.dims: + result = result.expand_dims(dim=collections.OrderedDict({'bands': ['band_0']})) + else: + if not 'bands' in result.coords: + labels = ['band_' + str(i) for i in range(result.shape[result.dims.index('bands')])] + result = result.assign_coords(bands=labels) + result = result.to_dataset('bands') + result.to_netcdf(path, engine=engine) +
+ +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/openeo/util.html b/_modules/openeo/util.html new file mode 100644 index 000000000..eb3d697e7 --- /dev/null +++ b/_modules/openeo/util.html @@ -0,0 +1,831 @@ + + + + + + + openeo.util — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +

Source code for openeo.util

+"""
+Various utilities and helpers.
+"""
+
+# TODO #465 split this kitchen-sink in thematic submodules
+
+from __future__ import annotations
+
+import datetime as dt
+import functools
+import json
+import logging
+import re
+import sys
+import time
+from collections import OrderedDict
+from enum import Enum
+from pathlib import Path
+from typing import Any, Callable, List, Optional, Tuple, Union
+from urllib.parse import urljoin
+
+import requests
+import shapely.geometry.base
+from deprecated import deprecated
+
+try:
+    # pyproj is an optional dependency
+    import pyproj
+except ImportError:
+    pyproj = None
+
+
+logger = logging.getLogger(__name__)
+
+
+class Rfc3339:
+    """
+    Formatter for dates according to RFC-3339.
+
+    Parses date(time)-like input and formats according to RFC-3339. Some examples:
+
+        >>> rfc3339.date("2020:03:17")
+        "2020-03-17"
+        >>> rfc3339.date(2020, 3, 17)
+        "2020-03-17"
+        >>> rfc3339.datetime("2020/03/17/12/34/56")
+        "2020-03-17T12:34:56Z"
+        >>> rfc3339.datetime([2020, 3, 17, 12, 34, 56])
+        "2020-03-17T12:34:56Z"
+        >>> rfc3339.datetime(2020, 3, 17)
+        "2020-03-17T00:00:00Z"
+        >>> rfc3339.datetime(datetime(2020, 3, 17, 12, 34, 56))
+        "2020-03-17T12:34:56Z"
+
+    Or just normalize (automatically preserve date/datetime resolution):
+
+        >>> rfc3339.normalize("2020/03/17")
+        "2020-03-17"
+        >>> rfc3339.normalize("2020-03-17-12-34-56")
+        "2020-03-17T12:34:56Z"
+
+    Also see https://tools.ietf.org/html/rfc3339#section-5.6
+    """
+    # TODO: currently we hard code timezone 'Z' for simplicity. Add real time zone support?
+    _FMT_DATE = '%Y-%m-%d'
+    _FMT_TIME = '%H:%M:%SZ'
+    _FMT_DATETIME = _FMT_DATE + "T" + _FMT_TIME
+
+    _regex_datetime = re.compile(r"""
+        ^(?P<Y>\d{4})[:/_-](?P<m>\d{2})[:/_-](?P<d>\d{2})[T :/_-]?
+        (?:(?P<H>\d{2})[:/_-](?P<M>\d{2})(?:[:/_-](?P<S>\d{2}))?)?""", re.VERBOSE)
+
+    def __init__(self, propagate_none: bool = False):
+        self._propagate_none = propagate_none
+
+    def datetime(self, x: Any, *args) -> Union[str, None]:
+        """
+        Format given date(time)-like object as RFC-3339 datetime string.
+        """
+        if args:
+            return self.datetime((x,) + args)
+        elif isinstance(x, dt.datetime):
+            return self._format_datetime(x)
+        elif isinstance(x, dt.date):
+            return self._format_datetime(dt.datetime.combine(x, dt.time()))
+        elif isinstance(x, str):
+            return self._format_datetime(dt.datetime(*self._parse_datetime(x)))
+        elif isinstance(x, (tuple, list)):
+            return self._format_datetime(dt.datetime(*(int(v) for v in x)))
+        elif x is None and self._propagate_none:
+            return None
+        raise ValueError(x)
+
+    def date(self, x: Any, *args) -> Union[str, None]:
+        """
+        Format given date-like object as RFC-3339 date string.
+        """
+        if args:
+            return self.date((x,) + args)
+        elif isinstance(x, (dt.date, dt.datetime)):
+            return self._format_date(x)
+        elif isinstance(x, str):
+            return self._format_date(dt.datetime(*self._parse_datetime(x)))
+        elif isinstance(x, (tuple, list)):
+            return self._format_date(dt.datetime(*(int(v) for v in x)))
+        elif x is None and self._propagate_none:
+            return None
+        raise ValueError(x)
+
+    def normalize(self, x: Any, *args) -> Union[str, None]:
+        """
+        Format given date(time)-like object as RFC-3339 date or date-time string depending on given resolution
+
+            >>> rfc3339.normalize("2020/03/17")
+            "2020-03-17"
+            >>> rfc3339.normalize("2020/03/17/12/34/56")
+            "2020-03-17T12:34:56Z"
+        """
+        if args:
+            return self.normalize((x,) + args)
+        elif isinstance(x, dt.datetime):
+            return self.datetime(x)
+        elif isinstance(x, dt.date):
+            return self.date(x)
+        elif isinstance(x, str):
+            x = self._parse_datetime(x)
+            return self.date(x) if len(x) <= 3 else self.datetime(x)
+        elif isinstance(x, (tuple, list)):
+            return self.date(x) if len(x) <= 3 else self.datetime(x)
+        elif x is None and self._propagate_none:
+            return None
+        raise ValueError(x)
+
+    def parse_date(self, x: Union[str, None]) -> Union[dt.date, None]:
+        """Parse given string as RFC3339 date."""
+        if isinstance(x, str):
+            return dt.datetime.strptime(x, "%Y-%m-%d").date()
+        elif x is None and self._propagate_none:
+            return None
+        raise ValueError(x)
+
+    def parse_datetime(
+        self, x: Union[str, None], with_timezone: bool = False
+    ) -> Union[dt.datetime, None]:
+        """Parse given string as RFC3339 date-time."""
+        if isinstance(x, str):
+            # TODO: Also support parsing other timezones than UTC (Z)
+            if re.search(r":\d+\.\d+", x):
+                res = dt.datetime.strptime(x, "%Y-%m-%dT%H:%M:%S.%fZ")
+            else:
+                res = dt.datetime.strptime(x, "%Y-%m-%dT%H:%M:%SZ")
+            if with_timezone:
+                res = res.replace(tzinfo=dt.timezone.utc)
+            return res
+        elif x is None and self._propagate_none:
+            return None
+        raise ValueError(x)
+
+    def parse_date_or_datetime(
+        self, x: Union[str, None], with_timezone: bool = False
+    ) -> Union[dt.date, dt.datetime, None]:
+        """Parse given string as RFC3339 date or date-time."""
+        if isinstance(x, str):
+            if len(x) > 10:
+                return self.parse_datetime(x, with_timezone=with_timezone)
+            else:
+                return self.parse_date(x)
+        elif x is None and self._propagate_none:
+            return None
+        raise ValueError(x)
+
+    @classmethod
+    def _format_datetime(cls, d: dt.datetime) -> str:
+        """Format given datetime as RFC-3339 date-time string."""
+        if not (d.tzinfo is None or d.tzinfo.tzname(d) == "UTC"):
+            # TODO: add support for non-UTC timezones?
+            raise ValueError(f"No support for non-UTC timezone {d.tzinfo}")
+        return d.strftime(cls._FMT_DATETIME)
+
+    @classmethod
+    def _format_date(cls, d: dt.date) -> str:
+        """Format given datetime as RFC-3339 date-time string."""
+        return d.strftime(cls._FMT_DATE)
+
+    @classmethod
+    def _parse_datetime(cls, s: str) -> Tuple[int]:
+        """Try to parse string to a date(time) tuple"""
+        try:
+            return tuple(int(v) for v in cls._regex_datetime.match(s).groups() if v is not None)
+        except Exception:
+            raise ValueError("Can not parse as date: {s}".format(s=s))
+
+    def today(self) -> str:
+        """Today (date) in RFC3339 format"""
+        return self.date(dt.date.today())
+
+    def utcnow(self) -> str:
+        """Current UTC datetime in RFC3339 format."""
+        # Current time in UTC timezone (instead of naive `datetime.datetime.utcnow()`, per `datetime` documentation)
+        now = dt.datetime.now(tz=dt.timezone.utc)
+        return self.datetime(now)
+
+
+# Default RFC3339 date-time formatter
+rfc3339 = Rfc3339()
+
+
+@deprecated("Use `rfc3339.normalize`, `rfc3339.date` or `rfc3339.datetime` instead")
+def date_to_rfc3339(d: Any) -> str:
+    """
+    Convert date-like object to a RFC 3339 formatted date string
+
+    see https://tools.ietf.org/html/rfc3339#section-5.6
+    """
+    return rfc3339.normalize(d)
+
+
+def dict_no_none(*args, **kwargs) -> dict:
+    """
+    Helper to build a dict containing given key-value pairs where the value is not None.
+    """
+    return {
+        k: v
+        for k, v in dict(*args, **kwargs).items()
+        if v is not None
+    }
+
+
+def first_not_none(*args):
+    """Return first item from given arguments that is not None."""
+    for item in args:
+        if item is not None:
+            return item
+    raise ValueError("No not-None values given.")
+
+
+def ensure_dir(path: Union[str, Path]) -> Path:
+    """Create directory if it doesn't exist."""
+    path = Path(path)
+    if not path.exists():
+        path.mkdir(parents=True, exist_ok=True)
+    assert path.is_dir()
+    return path
+
+
+def ensure_list(x):
+    """Convert given data structure to a list."""
+    try:
+        return list(x)
+    except TypeError:
+        return [x]
+
+
+class ContextTimer:
+    """
+    Context manager to measure the "wall clock" time (in seconds) inside/for a block of code.
+
+    Usage example:
+
+        with ContextTimer() as timer:
+            # Inside code block: currently elapsed time
+            print(timer.elapsed())
+
+        # Outside code block: elapsed time when block ended
+        print(timer.elapsed())
+
+    """
+
+    __slots__ = ["start", "end"]
+
+    # Function that returns current time in seconds (overridable for unit tests)
+    _clock = time.time
+
+    def __init__(self):
+        self.start = None
+        self.end = None
+
+    def elapsed(self) -> float:
+        """Elapsed time (in seconds) inside or at the end of wrapped context."""
+        if self.start is None:
+            raise RuntimeError("Timer not started.")
+        if self.end is not None:
+            # Elapsed time when exiting context.
+            return self.end - self.start
+        else:
+            # Currently elapsed inside context.
+            return self._clock() - self.start
+
+    def __enter__(self) -> ContextTimer:
+        self.start = self._clock()
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.end = self._clock()
+
+
+class TimingLogger:
+    """
+    Context manager for quick and easy logging of start time, end time and elapsed time of some block of code
+
+    Usage example:
+
+    >>> with TimingLogger("Doing batch job"):
+    ...     do_batch_job()
+
+    At start of the code block the current time will be logged
+    and at end of the code block the end time and elapsed time will be logged.
+
+    Can also be used as a function/method decorator, for example:
+
+    >>> @TimingLogger("Calculation going on")
+    ... def add(x, y):
+    ...     return x + y
+    """
+
+    # Function that returns current datetime (overridable for unit tests)
+    _now = dt.datetime.now
+
+    def __init__(self, title: str = "Timing", logger: Union[logging.Logger, str, Callable] = logger):
+        """
+        :param title: the title to use in the logging
+        :param logger: how the timing should be logged.
+            Can be specified as a logging.Logger object (in which case the INFO log level will be used),
+            as a string (name of the logging.Logger object to construct),
+            or as callable (e.g. to use the `print` function, or the `.debug` method of an existing logger)
+        """
+        self.title = title
+        if isinstance(logger, str):
+            logger = logging.getLogger(logger)
+        if isinstance(logger, (logging.Logger, logging.LoggerAdapter)):
+            self._log = logger.info
+        elif callable(logger):
+            self._log = logger
+        else:
+            raise ValueError("Invalid logger {l!r}".format(l=logger))
+
+        self.start_time = self.end_time = self.elapsed = None
+
+    def __enter__(self):
+        self.start_time = self._now()
+        self._log("{t}: start {s}".format(t=self.title, s=self.start_time))
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.end_time = self._now()
+        self.elapsed = self.end_time - self.start_time
+        self._log("{t}: {s} {e}, elapsed {d}".format(
+            t=self.title,
+            s="fail" if exc_type else "end",
+            e=self.end_time, d=self.elapsed
+        ))
+
+    def __call__(self, f: Callable):
+        """
+        Use TimingLogger as function/method decorator
+        """
+
+        @functools.wraps(f)
+        def wrapper(*args, **kwargs):
+            with self:
+                return f(*args, **kwargs)
+
+        return wrapper
+
+
+class DeepKeyError(LookupError):
+    def __init__(self, key, keys):
+        super(DeepKeyError, self).__init__("{k!r} (from deep key {s!r})".format(k=key, s=keys))
+
+
+# Sentinel object for `default` argument of `deep_get`
+_deep_get_default_undefined = object()
+
+
+def deep_get(data: dict, *keys, default=_deep_get_default_undefined):
+    """
+    Get value deeply from nested dictionaries/lists/tuples
+
+    :param data: nested data structure of dicts, lists, tuples
+    :param keys: sequence of keys/indexes to traverse
+    :param default: default value when a key is missing.
+        By default a DeepKeyError will be raised.
+    :return:
+    """
+    for key in keys:
+        if isinstance(data, dict) and key in data:
+            data = data[key]
+        elif isinstance(data, (list, tuple)) and isinstance(key, int) and 0 <= key < len(data):
+            data = data[key]
+        else:
+            if default is _deep_get_default_undefined:
+                raise DeepKeyError(key, keys)
+            else:
+                return default
+    return data
+
+
+def deep_set(data: dict, *keys, value):
+    """
+    Set a value deeply in nested dictionary
+
+    :param data: nested data structure of dicts, lists, tuples
+    :param keys: sequence of keys/indexes to traverse
+    :param value: value to set
+    """
+    if len(keys) == 1:
+        data[keys[0]] = value
+    elif len(keys) > 1:
+        if isinstance(data, dict):
+            deep_set(data.setdefault(keys[0], OrderedDict()), *keys[1:], value=value)
+        elif isinstance(data, (list, tuple)):
+            deep_set(data[keys[0]], *keys[1:], value=value)
+        else:
+            ValueError(data)
+    else:
+        raise ValueError("No keys given")
+
+
+def guess_format(filename: Union[str, Path]) -> Union[str, None]:
+    """
+    Guess the output format from a given filename and return the corrected format.
+    Any names not in the dict get passed through.
+    """
+    extension = Path(filename).suffix
+    if not extension:
+        return None
+    extension = extension[1:].lower()
+
+    format_map = {
+        "gtiff": "GTiff",
+        "geotiff": "GTiff",
+        "geotif": "GTiff",
+        "tiff": "GTiff",
+        "tif": "GTiff",
+        "nc": "netCDF",
+        "netcdf": "netCDF",
+        "geojson": "GeoJSON",
+    }
+
+    return format_map.get(extension, extension.upper())
+
+
+def load_json(path: Union[Path, str]) -> dict:
+    with Path(path).open("r", encoding="utf-8") as f:
+        return json.load(f)
+
+
+
+[docs] +def load_json_resource(src: Union[str, Path]) -> dict: + """ + Helper to load some kind of JSON resource + + :param src: a JSON resource: a raw JSON string, + a path to (local) JSON file, or a URL to a remote JSON resource + :return: data structured parsed from JSON + """ + if isinstance(src, str) and src.strip().startswith("{"): + # Assume source is a raw JSON string + return json.loads(src) + elif isinstance(src, str) and re.match(r"^https?://", src, flags=re.I): + # URL to remote JSON resource + return requests.get(src).json() + elif isinstance(src, Path) or (isinstance(src, str) and src.endswith(".json")): + # Assume source is a local JSON file path + return load_json(src) + raise ValueError(src)
+ + + +class LazyLoadCache: + """Simple cache that allows to (lazy) load on cache miss.""" + + def __init__(self): + self._cache = {} + + def get(self, key: Union[str, tuple], load: Callable[[], Any]): + if key not in self._cache: + self._cache[key] = load() + return self._cache[key] + + +def str_truncate(text: str, width: int = 64, ellipsis: str = "...") -> str: + """Shorten a string (with an ellipsis) if it is longer than certain length.""" + width = max(0, int(width)) + if len(text) <= width: + return text + if len(ellipsis) > width: + ellipsis = ellipsis[:width] + return text[:max(0, (width - len(ellipsis)))] + ellipsis + + +def repr_truncate(obj: Any, width: int = 64, ellipsis: str = "...") -> str: + """Do `repr` rendering of an object, but truncate string if it is too long .""" + if isinstance(obj, str) and width > len(ellipsis) + 2: + # Special case: put ellipsis inside quotes + return repr(str_truncate(text=obj, width=width - 2, ellipsis=ellipsis)) + else: + # General case: just put ellipsis at end + return str_truncate(text=repr(obj), width=width, ellipsis=ellipsis) + + +def in_interactive_mode() -> bool: + """Detect if we are running in interactive mode (Jupyter/IPython/repl)""" + # Based on https://stackoverflow.com/a/64523765 + return hasattr(sys, "ps1") + + +class InvalidBBoxException(ValueError): + pass + + +
+[docs] +class BBoxDict(dict): + """ + Dictionary based helper to easily create/work with bounding box dictionaries + (having keys "west", "south", "east", "north", and optionally "crs"). + + :param crs: value describing the coordinate reference system. + Typically just an int (interpreted as EPSG code, e.g. ``4326``) + or a string (handled as authority string, e.g. ``"EPSG:4326"``). + See :py:func:`openeo.util.normalize_crs` for more details about additional normalization that is applied to this argument. + + .. versionadded:: 0.10.1 + """ + + def __init__(self, *, west: float, south: float, east: float, north: float, crs: Optional[Union[str, int]] = None): + super().__init__(west=west, south=south, east=east, north=north) + if crs is not None: + self.update(crs=normalize_crs(crs)) + + # TODO: provide west, south, east, north, crs as @properties? Read-only or read-write? + + @classmethod + def from_any(cls, x: Any, *, crs: Optional[str] = None) -> BBoxDict: + if isinstance(x, dict): + if crs and "crs" in x and crs != x["crs"]: + raise InvalidBBoxException(f"Two CRS values specified: {crs} and {x['crs']}") + return cls.from_dict({"crs": crs, **x}) + elif isinstance(x, (list, tuple)): + return cls.from_sequence(x, crs=crs) + elif isinstance(x, shapely.geometry.base.BaseGeometry): + return cls.from_sequence(x.bounds, crs=crs) + # TODO: support other input? E.g.: WKT string, GeoJson-style dictionary (Polygon, FeatureCollection, ...) + else: + raise InvalidBBoxException(f"Can not construct BBoxDict from {x!r}") + +
+[docs] + @classmethod + def from_dict(cls, data: dict) -> BBoxDict: + """Build from dictionary with at least keys "west", "south", "east", and "north".""" + expected_fields = {"west", "south", "east", "north"} + # TODO: also support upper case fields? + # TODO: optional support for parameterized bbox fields? + missing = expected_fields.difference(data.keys()) + if missing: + raise InvalidBBoxException(f"Missing bbox fields {sorted(missing)}") + invalid = {k: data[k] for k in expected_fields if not isinstance(data[k], (int, float))} + if invalid: + raise InvalidBBoxException(f"Non-numerical bbox fields {invalid}.") + return cls(west=data["west"], south=data["south"], east=data["east"], north=data["north"], crs=data.get("crs"))
+ + +
+[docs] + @classmethod + def from_sequence(cls, seq: Union[list, tuple], crs: Optional[str] = None) -> BBoxDict: + """Build from sequence of 4 bounds (west, south, east and north).""" + if len(seq) != 4: + raise InvalidBBoxException(f"Expected sequence with 4 items, but got {len(seq)}.") + return cls(west=seq[0], south=seq[1], east=seq[2], north=seq[3], crs=crs)
+
+ + + +
+[docs] +def to_bbox_dict(x: Any, *, crs: Optional[Union[str, int]] = None) -> BBoxDict: + """ + Convert given data or object to a bounding box dictionary + (having keys "west", "south", "east", "north", and optionally "crs"). + + Supports various input types/formats: + + - list/tuple (assumed to be in west-south-east-north order) + + >>> to_bbox_dict([3, 50, 4, 51]) + {'west': 3, 'south': 50, 'east': 4, 'north': 51} + + - dictionary (unnecessary items will be stripped) + + >>> to_bbox_dict({ + ... "color": "red", "shape": "triangle", + ... "west": 1, "south": 2, "east": 3, "north": 4, "crs": "EPSG:4326", + ... }) + {'west': 1, 'south': 2, 'east': 3, 'north': 4, 'crs': 'EPSG:4326'} + + - a shapely geometry + + .. versionadded:: 0.10.1 + + :param x: input data that describes west-south-east-north bounds in some way, e.g. as a dictionary, + a list, a tuple, ashapely geometry, ... + :param crs: (optional) CRS field + :return: dictionary (subclass) with keys "west", "south", "east", "north", and optionally "crs". + """ + return BBoxDict.from_any(x=x, crs=crs)
+ + + +def url_join(root_url: str, path: str): + """Join a base url and sub path properly.""" + return urljoin(root_url.rstrip("/") + "/", path.lstrip("/")) + + +def clip(x: float, min: float, max: float) -> float: + """Clip given value between minimum and maximum value""" + return min if x < min else (x if x < max else max) + + +class SimpleProgressBar: + """Simple ASCII-based progress bar helper.""" + + __slots__ = ["width", "bar", "fill", "left", "right"] + + def __init__(self, width: int = 40, *, bar: str = "#", fill: str = "-", left: str = "[", right: str = "]"): + self.width = int(width) + self.bar = bar[0] + self.fill = fill[0] + self.left = left + self.right = right + + def get(self, fraction: float) -> str: + width = self.width - len(self.left) - len(self.right) + bar = self.bar * int(round(width * clip(fraction, min=0, max=1))) + return f"{self.left}{bar:{self.fill}<{width}s}{self.right}" + + +
+[docs] +def normalize_crs(crs: Any, *, use_pyproj: bool = True) -> Union[None, int, str]: + """ + Normalize the given value (describing a CRS or Coordinate Reference System) + to an openEO compatible EPSG code (int) or WKT2 CRS string. + + At minimum, the following input values are handled: + + - an integer value (e.g. ``4326``) is interpreted as an EPSG code + - a string that just contains an integer (e.g. ``"4326"``) + or with and additional ``"EPSG:"`` prefix (e.g. ``"EPSG:4326"``) + will also be interpreted as an EPSG value + + Additional support and behavior depends on the availability of the ``pyproj`` library: + + - When available, it will be used for parsing and validation: + everything supported by `pyproj.CRS.from_user_input <https://pyproj4.github.io/pyproj/dev/api/crs/crs.html#pyproj.crs.CRS.from_user_input>`_ is allowed. + See the ``pyproj`` docs for more details. + - Otherwise, some best effort validation is done: + EPSG looking integer or string values will be parsed as such as discussed above. + Other strings will be assumed to be WKT2 already. + Other data structures will not be accepted. + + :param crs: value that encodes a coordinate reference system, typically just an int (EPSG code) or string (authority string). + If the ``pyproj`` library is available, everything supported by it is allowed. + + :param use_pyproj: whether ``pyproj`` should be leveraged at all + (mainly useful for testing the "no pyproj available" code path) + + :return: EPSG code as int, or WKT2 string. Or None if input was empty. + + :raises ValueError: + When the given CRS data can not be parsed/converted/normalized. + + """ + if crs in (None, "", {}): + return None + + if pyproj and use_pyproj: + try: + # (if available:) let pyproj do the validation/parsing + crs_obj = pyproj.CRS.from_user_input(crs) + # Convert back to EPSG int or WKT2 string + crs = crs_obj.to_epsg() or crs_obj.to_wkt() + except pyproj.ProjError as e: + raise ValueError(f"Failed to normalize CRS data with pyproj: {crs!r}") from e + else: + # Best effort simple validation/normalization + if isinstance(crs, int) and crs > 0: + # Assume int is already valid EPSG code + pass + elif isinstance(crs, str): + # Parse as EPSG int code if it looks like that, + # otherwise: leave it as-is, assuming it is a valid WKT2 CRS string + if re.match(r"^(epsg:)?\d+$", crs.strip(), flags=re.IGNORECASE): + crs = int(crs.split(":")[-1]) + elif "GEOGCRS[" in crs: + # Very simple WKT2 CRS detection heuristic + logger.warning(f"Assuming this is a valid WK2 CRS string: {repr_truncate(crs)}") + else: + raise ValueError(f"Can not normalize CRS string {repr_truncate(crs)}") + else: + raise ValueError(f"Can not normalize CRS data {type(crs)}") + + return crs
+ +
+ +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_sources/api-processbuilder.rst.txt b/_sources/api-processbuilder.rst.txt new file mode 100644 index 000000000..7ebdca75c --- /dev/null +++ b/_sources/api-processbuilder.rst.txt @@ -0,0 +1,87 @@ +.. FYI this file is intended to be inlined (with "include" RST directive) + in the ProcessBuilder class doc block, + which in turn is covered with autodoc/automodule from api-processes.rst. + + +The :py:class:`ProcessBuilder ` class +is a helper class that implements +(much like the :ref:`openEO process functions `) +each openEO process as a method. +On top of that it also adds syntactic sugar to support Python operators as well +(e.g. ``+`` is translated to the ``add`` process). + +.. attention:: + As normal user, you should never create a + :py:class:`ProcessBuilder ` instance + directly. + + You should only interact with this class inside a callback + function/lambda while building a child callback process graph + as discussed at :ref:`child_callback_callable`. + + +For example, let's start from this simple usage snippet +where we want to reduce the temporal dimension +by taking the temporal mean of each timeseries: + +.. code-block:: python + + def my_reducer(data): + return data.mean() + + cube.reduce_dimension(reducer=my_reducer, dimension="t") + +Note that this ``my_reducer`` function has a ``data`` argument, +which conceptually corresponds to an array of pixel values +(along the temporal dimension). +However, it's important to understand that the ``my_reducer`` function +is actually *not evaluated when you execute your process graph* +on an openEO back-end, e.g. as a batch jobs. +Instead, ``my_reducer`` is evaluated +*while building your process graph client-side* +(at the time you execute that ``cube.reduce_dimension()`` statement to be precise). +This means that that ``data`` argument is actually not a concrete array of EO data, +but some kind of *virtual placeholder*, +a :py:class:`ProcessBuilder ` instance, +that keeps track of the operations you intend to do on the EO data. + +To make that more concrete, it helps to add type hints +which will make it easier to discover what you can do with the argument +(depending on which editor or IDE you are using): + +.. code-block:: python + + from openeo.processes import ProcessBuilder + + def my_reducer(data: ProcessBuilder) -> ProcessBuilder: + return data.mean() + + cube.reduce_dimension(reducer=my_reducer, dimension="t") + + +Because :py:class:`ProcessBuilder ` methods +return new :py:class:`ProcessBuilder ` instances, +and because it support syntactic sugar to use Python operators on it, +and because :ref:`openeo.process functions ` +also accept and return :py:class:`ProcessBuilder ` instances, +we can mix methods, functions and operators in the callback function like this: + +.. code-block:: python + + from openeo.processes import ProcessBuilder, cos + + def my_reducer(data: ProcessBuilder) -> ProcessBuilder: + return cos(data.mean()) + 1.23 + + cube.reduce_dimension(reducer=my_reducer, dimension="t") + +or compactly, using an anonymous lambda expression: + +.. code-block:: python + + from openeo.processes import cos + + cube.reduce_dimension( + reducer=lambda data: cos(data.mean())) + 1.23, + dimension="t" + ) diff --git a/_sources/api-processes.rst.txt b/_sources/api-processes.rst.txt new file mode 100644 index 000000000..b52384e35 --- /dev/null +++ b/_sources/api-processes.rst.txt @@ -0,0 +1,68 @@ +========================= +API: ``openeo.processes`` +========================= + +The ``openeo.processes`` module contains building blocks and helpers +to construct so called "child callbacks" for openEO processes like +:py:meth:`openeo.rest.datacube.DataCube.apply` and +:py:meth:`openeo.rest.datacube.DataCube.reduce_dimension`, +as discussed at :ref:`child_callback_callable`. + +.. note:: + The contents of the ``openeo.processes`` module is automatically compiled + from the official openEO process specifications. + Developers that want to fix bugs in, or add implementations to this + module should not touch the file directly, but instead address it in the + upstream `openeo-processes `_ repository + or in the internal tooling to generate this file. + + +.. contents:: Sections: + :depth: 1 + :local: + :backlinks: top + + +.. _openeo_processes_functions: + +Functions in ``openeo.processes`` +--------------------------------- + +The ``openeo.processes`` module implements (at top-level) +a regular Python function for each openEO process +(not only the official stable ones, but also experimental ones in "proposal" state). + +These functions can be used directly as child callback, +for example as follows: + +.. code-block:: python + + from openeo.processes import absolute, max + + cube.apply(absolute) + cube.reduce_dimension(max, dimension="t") + + +Note how the signatures of the parent :py:class:`DataCube ` methods +and the callback functions match up: + +- :py:meth:`DataCube.apply() ` + expects a callback that receives a single numerical value, + which corresponds to the parameter signature of :py:func:`openeo.processes.absolute` +- :py:meth:`DataCube.reduce_dimension() ` + expects a callback that receives an array of numerical values, + which corresponds to the parameter signature :py:func:`openeo.processes.max` + + +.. automodule:: openeo.processes + :members: + :exclude-members: ProcessBuilder, process, _process + + +``ProcessBuilder`` helper class +-------------------------------- + +.. FYI the ProcessBuilder docs are provided through its doc block + with an RST "include" of "api-processbuilder.rst" + +.. autoclass:: openeo.processes.ProcessBuilder diff --git a/_sources/api.rst.txt b/_sources/api.rst.txt new file mode 100644 index 000000000..a20747cc4 --- /dev/null +++ b/_sources/api.rst.txt @@ -0,0 +1,177 @@ +============= +API (General) +============= + +High level Interface +-------------------- + +The high-level interface tries to provide an opinionated, Pythonic, API +to interact with openEO back-ends. It's aim is to hide some of the details +of using a web service, so the user can produce concise and readable code. + +Users that want to interact with openEO on a lower level, and have more control, can +use the lower level classes. + + +openeo +-------- + +.. autofunction:: openeo.connect + + +openeo.rest.datacube +----------------------- + +.. automodule:: openeo.rest.datacube + :members: DataCube + :inherited-members: + :special-members: __init__ + +.. automodule:: openeo.rest._datacube + :members: UDF + + +openeo.rest.vectorcube +------------------------ + +.. automodule:: openeo.rest.vectorcube + :members: VectorCube + :inherited-members: + + +openeo.rest.mlmodel +--------------------- + +.. automodule:: openeo.rest.mlmodel + :members: MlModel + :inherited-members: + + +openeo.rest.multiresult +----------------------- + +.. automodule:: openeo.rest.multiresult + :members: MultiResult + :inherited-members: + :special-members: __init__ + + +openeo.metadata +---------------- + +.. automodule:: openeo.metadata + :members: CollectionMetadata, BandDimension, SpatialDimension, TemporalDimension + + +openeo.api.process +-------------------- + +.. automodule:: openeo.api.process + :members: Parameter + + +openeo.api.logs +----------------- + +.. automodule:: openeo.api.logs + :members: LogEntry, normalize_log_level + + +openeo.rest.connection +---------------------- + +.. automodule:: openeo.rest.connection + :members: Connection + + +openeo.rest.job +------------------ + +.. automodule:: openeo.rest.job + :members: BatchJob, RESTJob, JobResults, ResultAsset + + +openeo.rest.conversions +------------------------- + +.. automodule:: openeo.rest.conversions + :members: + + +openeo.rest.udp +----------------- + +.. automodule:: openeo.rest.udp + :members: RESTUserDefinedProcess, build_process_dict + + +openeo.rest.userfile +---------------------- + +.. automodule:: openeo.rest.userfile + :members: + + +openeo.udf +------------- + +.. automodule:: openeo.udf.udf_data + :members: UdfData + +.. automodule:: openeo.udf.xarraydatacube + :members: XarrayDataCube + +.. automodule:: openeo.udf.structured_data + :members: StructuredData + +.. automodule:: openeo.udf.run_code + :members: execute_local_udf, extract_udf_dependencies + +.. automodule:: openeo.udf.debug + :members: inspect + + +openeo.util +------------- + +.. automodule:: openeo.util + :members: to_bbox_dict, BBoxDict, load_json_resource, normalize_crs + + +openeo.processes +---------------- + +.. Note that only openeo.processes.process is included here + the rest of openeo.processes is included from api-processes.rst + +.. autofunction:: openeo.processes.process + + +Graph building +---------------- + +Various utilities and helpers to simplify the construction of openEO process graphs. + +.. automodule:: openeo.rest.graph_building + :members: collection_property, CollectionProperty + +.. automodule:: openeo.internal.graph_building + :members: PGNode, FlatGraphableMixin + + +Testing +-------- + +Various utilities for testing use cases (unit tests, integration tests, benchmarking, ...) + +openeo.testing +`````````````` + +.. automodule:: openeo.testing + :members: + +openeo.testing.results +`````````````````````` + +.. automodule:: openeo.testing.results + :members: diff --git a/_sources/auth.rst.txt b/_sources/auth.rst.txt new file mode 100644 index 000000000..dfc8a47e9 --- /dev/null +++ b/_sources/auth.rst.txt @@ -0,0 +1,611 @@ +.. _authentication_chapter: + +************************************* +Authentication and Account Management +************************************* + + +While a couple of openEO operations can be done +anonymously, most of the interesting parts +of the API require you to identify as a registered +user. +The openEO API specifies two ways to authenticate +as a user: + +* OpenID Connect (recommended, but not always straightforward to use) +* Basic HTTP Authentication (not recommended, but practically easier in some situations) + +To illustrate how to authenticate with the openEO Python Client Library, +we start form a back-end connection:: + + import openeo + + connection = openeo.connect("https://openeo.example.com") + +Basic HTTP Auth +=============== + +Let's start with the easiest authentication method, +based on the Basic HTTP authentication scheme. +It is however *not recommended* for various reasons, +such as its limited *security* measures. +For example, if you are connecting to a back-end with a ``http://`` URL +instead of a ``https://`` one, you should certainly not use basic HTTP auth. + +With these security related caveats out of the way, you authenticate +using your username and password like this:: + + connection.authenticate_basic("john", "j0hn123") + +Subsequent usage of the connection object ``connection`` will +use authenticated calls. +For example, show information about the authenticated user:: + + >>> connection.describe_account() + {'user_id': 'john'} + + + +OpenID Connect Based Authentication +=================================== + +OpenID Connect (often abbreviated "OIDC") is an identity layer on top of the OAuth 2.0 protocol. +An in-depth discussion of the whole architecture would lead us too far here, +but some central OpenID Connect concepts are quite useful to understand +in the context of working with openEO: + +* There is **decoupling** between: + + * the *OpenID Connect identity provider* + which handles the authentication/authorization and stores user information + (e.g. an organization Google, Github, Microsoft, your academic/research institution, ...) + * the *openEO back-end* which manages earth observation collections + and executes your algorithms + + Instead of managing the authentication procedure itself, + an openEO back-end forwards a user to the relevant OpenID Connect provider to authenticate + and request access to basic profile information (e.g. email address). + On return, when the user allowed this access, + the openEO back-end receives the profile information and uses this to identify the user. + + Note that with this approach, the back-end does not have to + take care of all the security and privacy challenges + of properly handling user registration, passwords/authentication, etc. + Also, it allows the user to securely reuse an existing account + registered with an established organisation, instead of having + to register yet another account with some web service. + +* Your openEO script or application acts as + a so called **OpenID Connect client**, with an associated **client id**. + In most cases, a default client (id) defined by the openEO back-end will be used automatically. + For some applications a custom client might be necessary, + but this is out of scope of this documentation. + +* OpenID Connect authentication can be done with different kind of "**flows**" (also called "grants") + and picking the right flow depends on your specific use case. + The most common OIDC flows using the openEO Python Client Library are: + + * :ref:`authenticate_oidc_device` + * :ref:`authenticate_oidc_client_credentials` + * :ref:`authenticate_oidc_refresh_token` + + +OpenID Connect is clearly more complex than Basic HTTP Auth. +In the sections below we will discuss the practical details of each flow. + +General options +--------------- + +* A back-end might support **multiple OpenID Connect providers**. + The openEO Python Client Library will pick the first one by default, + but another another provider can specified explicity with the ``provider_id`` argument, e.g.: + + .. code-block:: python + + connection.authenticate_oidc_device( + provider_id="gl", + ... + ) + + + +.. _authenticate_oidc_device: + +OIDC Authentication: Device Code Flow +====================================== + +The device code flow (also called device authorization grant) +is an interactive flow that requires a web browser for the authentication +with the OpenID Connect provider. +The nice things is that the browser doesn't have to run on +the same system or network as where you run your application, +you could even use a browser on your mobile phone. + +Use :py:meth:`~openeo.rest.connection.Connection.authenticate_oidc_device` to initiate the flow: + +.. code-block:: python + + connection.authenticate_oidc_device() + +This will print a message like this: + +.. code-block:: text + + Visit https://oidc.example.net/device + and enter user code 'DTNY-KLNX' to authenticate. + +Some OpenID Connect Providers use a slightly longer URL that already includes +the user code, and then you don't need to enter the user code in one of the next steps: + +.. code-block:: text + + Visit https://oidc.example.net/device?user_code=DTNY-KLNX to authenticate. + +You should now visit this URL in your browser of choice. +Usually, it is intentionally a short URL to make it feasible to type it +instead of copy-pasting it (e.g. on another device). + +Authenticate with the OpenID Connect provider and, if requested, enter the user code +shown in the message. +When the URL already contains the user code, the page won't ask for this code. + +Meanwhile, the openEO Python Client Library is actively polling the OpenID Connect +provider and when you successfully complete the authentication, +it will receive the necessary tokens for authenticated communication +with the back-end and print: + +.. code-block:: text + + Authorized successfully. + +In case of authentication failure, the openEO Python Client Library +will stop polling at some point and raise an exception. + + + + +.. _authenticate_oidc_refresh_token: + +OIDC Authentication: Refresh Token Flow +======================================== + +When OpenID Connect authentication completes successfully, +the openID Python library receives an access token +to be used when doing authenticated calls to the back-end. +The access token usually has a short lifetime to reduce +the security risk when it would be stolen or intercepted. +The openID Python library also receives a *refresh token* +that can be used, through the Refresh Token flow, +to easily request a new access token, +without having to re-authenticate, +which makes it useful for **non-interactive uses cases**. + + +However, as it needs an existing refresh token, +the Refresh Token Flow requires +**first to authenticate with one of the other flows** +(but in practice this should not be done very often +because refresh tokens usually have a relatively long lifetime). +When doing the initial authentication, +you have to explicitly enable storage of the refresh token, +through the ``store_refresh_token`` argument, e.g.: + +.. code-block:: python + + connection.authenticate_oidc_device( + ... + store_refresh_token=True + + + +The refresh token will be stored in file in private file +in your home directory and will be used automatically +when authenticating with the Refresh Token Flow, +using :py:meth:`~openeo.rest.connection.Connection.authenticate_oidc_refresh_token`: + +.. code-block:: python + + connection.authenticate_oidc_refresh_token() + +You can also bootstrap the refresh token file +as described in :ref:`oidc_auth_get_refresh_token` + + + +.. _authenticate_oidc_client_credentials: + +OIDC Authentication: Client Credentials Flow +============================================= + +The OIDC Client Credentials flow does not involve interactive authentication (e.g. through a web browser), +which makes it a useful option for **non-interactive use cases**. + +.. important:: + This method requires a custom **OIDC client id** and **client secret**. + It is out of scope of this general documentation to explain + how to obtain these as it depends on the openEO back-end you are using + and the OIDC provider that is in play. + + Also, your openEO back-end might not allow it, because technically + you are authenticating a *client* instead of a *user*. + + Consult the support of the openEO back-end you want to use for more information. + +In its most simple form, given your client id and secret, +you can authenticate with +:py:meth:`~openeo.rest.connection.Connection.authenticate_oidc_client_credentials` +as follows: + +.. code-block:: python + + connection.authenticate_oidc_client_credentials( + client_id=client_id, + client_secret=client_secret, + ) + +You might also have to pass a custom provider id (argument ``provider_id``) +if your OIDC client is associated with an OIDC provider that is different from the default provider. + +.. caution:: + Make sure to *keep the client secret a secret* and avoid putting it directly in your source code + or, worse, committing it to a version control system. + Instead, fetch the secret from a protected source (e.g. a protected file, a database for sensitive data, ...) + or from environment variables. + +.. _authenticate_oidc_client_credentials_env_vars: + +OIDC Client Credentials Using Environment Variables +---------------------------------------------------- + +Since version 0.18.0, the openEO Python Client Library has built-in support to get the client id, +secret (and provider id) from environment variables +``OPENEO_AUTH_CLIENT_ID``, ``OPENEO_AUTH_CLIENT_SECRET`` and ``OPENEO_AUTH_PROVIDER_ID`` respectively. +Just call :py:meth:`~openeo.rest.connection.Connection.authenticate_oidc_client_credentials` +without arguments. + +Usage example assuming a Linux (Bash) shell context: + +.. code-block:: console + + $ export OPENEO_AUTH_CLIENT_ID="my-client-id" + $ export OPENEO_AUTH_CLIENT_SECRET="Cl13n7S3cr3t!?123" + $ export OPENEO_AUTH_PROVIDER_ID="oidcprovider" + $ python + >>> import openeo + >>> connection = openeo.connect("openeo.example.com") + >>> connection.authenticate_oidc_client_credentials() + + + + +.. _authenticate_oidc_automatic: + +OIDC Authentication: Dynamic Method Selection +============================================== + +The sections above discuss various authentication options, like +the :ref:`device code flow `, +:ref:`refresh tokens ` and +:ref:`client credentials flow `, +but often you want to *dynamically* switch between these depending on the situation: +e.g. use a refresh token if you have an active one, and fallback on the device code flow otherwise. +Or you want to be able to run the same code in an interactive environment and automated in an unattended manner, +without having to switch authentication methods explicitly in code. + +That is what :py:meth:`Connection.authenticate_oidc() ` is for: + +.. code-block:: python + + connection.authenticate_oidc() # is all you need + +In a basic situation (without any particular environment variables set as discussed further), +this method will first try to authenticate with refresh tokens (if any) +and fall back on the device code flow otherwise. +Ideally, when valid refresh tokens are available, this works without interaction, +but occasionally, when the refresh tokens expire, one has to do the interactive device code flow. + +Since version 0.18.0, the openEO Python Client Library also allows to trigger the +:ref:`client credentials flow ` +from :py:meth:`~openeo.rest.connection.Connection.authenticate_oidc` +by setting environment variable ``OPENEO_AUTH_METHOD`` +and the other :ref:`client credentials environment variables `. +For example: + +.. code-block:: shell + + $ export OPENEO_AUTH_METHOD="client_credentials" + $ export OPENEO_AUTH_CLIENT_ID="my-client-id" + $ export OPENEO_AUTH_CLIENT_SECRET="Cl13n7S3cr3t!?123" + $ export OPENEO_AUTH_PROVIDER_ID="oidcprovider" + $ python + >>> import openeo + >>> connection = openeo.connect("openeo.example.com") + >>> connection.authenticate_oidc() + + + + + + + + + +.. _auth_configuration_files: + +Auth config files and ``openeo-auth`` helper tool +==================================================== + +The openEO Python Client Library provides some features and tools +that ease the usability and security challenges +that come with authentication (especially in case of OpenID Connect). + +Note that the code examples above contain quite some **passwords and other secrets** +that should be kept safe from prying eyes. +It is bad practice to define these kind of secrets directly +in your scripts and source code because that makes it quite hard +to responsibly share or reuse your code. +Even worse is storing these secrets in your version control system, +where it might be near impossible to remove them again. +A better solution is to keep **secrets in separate configuration or cache files**, +outside of your normal source code tree +(to avoid committing them accidentally). + + +The openEO Python Client Library supports config files to store: +user names, passwords, client IDs, client secrets, etc, +so you don't have to specify them always in your scripts and applications. + +The openEO Python Client Library (when installed properly) +provides a command line tool ``openeo-auth`` to bootstrap and manage +these configs and secrets. +It is a command line tool that provides various "subcommands" +and has built-in help:: + + $ openeo-auth -h + usage: openeo-auth [-h] [--verbose] + {paths,config-dump,token-dump,add-basic,add-oidc,oidc-auth} + ... + + Tool to manage openEO related authentication and configuration. + + optional arguments: + -h, --help show this help message and exit + + Subcommands: + {paths,config-dump,token-dump,add-basic,add-oidc,oidc-auth} + paths Show paths to config/token files. + config-dump Dump config file. + ... + + + +For example, to see the expected paths of the config files:: + + $ openeo-auth paths + openEO auth config: /home/john/.config/openeo-python-client/auth-config.json (perms: 0o600, size: 1414B) + openEO OpenID Connect refresh token store: /home/john/.local/share/openeo-python-client/refresh-tokens.json (perms: 0o600, size: 846B) + + +With the ``config-dump`` and ``token-dump`` subcommands you can dump +the current configuration and stored refresh tokens, e.g.:: + + $ openeo-auth config-dump + ### /home/john/.config/openeo-python-client/auth-config.json ############### + { + "backends": { + "https://openeo.example.com": { + "basic": { + "username": "john", + "password": "", + "date": "2020-07-24T13:40:50Z" + ... + +The sensitive information (like passwords) are redacted by default. + + + +Basic HTTP Auth config +----------------------- + +With the ``add-basic`` subcommand you can add Basic HTTP Auth credentials +for a given back-end to the config. +It will interactively ask for username and password and +try if these credentials work:: + + $ openeo-auth add-basic https://openeo.example.com/ + Enter username and press enter: john + Enter password and press enter: + Trying to authenticate with 'https://openeo.example.com' + Successfully authenticated 'john' + Saved credentials to '/home/john/.config/openeo-python-client/auth-config.json' + +Now you can authenticate in your application without having to +specify username and password explicitly:: + + connection.authenticate_basic() + +OpenID Connect configs +----------------------- + +Likewise, with the ``add-oidc`` subcommand you can add OpenID Connect +credentials to the config:: + + $ openeo-auth add-oidc https://openeo.example.com/ + Using provider ID 'example' (issuer 'https://oidc.example.net/') + Enter client_id and press enter: client-d7393fba + Enter client_secret and press enter: + Saved client information to '/home/john/.config/openeo-python-client/auth-config.json' + +Now you can user OpenID Connect based authentication in your application +without having to specify the client ID and client secret explicitly, +like one of these calls:: + + connection.authenticate_oidc_authorization_code() + connection.authenticate_oidc_client_credentials() + connection.authenticate_oidc_resource_owner_password_credentials(username=username, password=password) + connection.authenticate_oidc_device() + connection.authenticate_oidc_refresh_token() + +Note that you still have to add additional options as required, like +``provider_id``, ``server_address``, ``store_refresh_token``, etc. + + +.. _oidc_auth_get_refresh_token: + +OpenID Connect refresh tokens +````````````````````````````` + +There is also a ``oidc-auth`` subcommand to execute an OpenID Connect +authentication flow and store the resulting refresh token. +This is intended to for bootstrapping the environment or system +on which you want to run openEO scripts or applications that use +the Refresh Token Flow for authentication. +For example:: + + $ openeo-auth oidc-auth https://openeo.example.com + Using config '/home/john/.config/openeo-python-client/auth-config.json'. + Starting OpenID Connect device flow. + To authenticate: visit https://oidc.example.net/device and enter the user code 'Q7ZNsy'. + Authorized successfully. + The OpenID Connect device flow was successful. + Stored refresh token in '/home/john/.local/share/openeo-python-client/refresh-tokens.json' + + + +.. _default_url_and_auto_auth: + +Default openEO back-end URL and auto-authentication +===================================================== + +.. versionadded:: 0.10.0 + + +If you often use the same openEO back-end URL and authentication scheme, +it can be handy to put these in a configuration file as discussed at :ref:`configuration_files`. + +.. note:: + Note that :ref:`these general configuration files ` are different + from the auth config files discussed earlier under :ref:`auth_configuration_files`. + The latter are for storing authentication related secrets + and are mostly managed automatically (e.g. by the ``oidc-auth`` helper tool). + The former are not for storing secrets and are usually edited manually. + +For example, to define a default back-end and automatically use OpenID Connect authentication +add these configuration options to the :ref:`desired configuration file `:: + + [Connection] + default_backend = openeo.cloud + default_backend.auto_authenticate = oidc + +Getting an authenticated connection is now as simple as:: + + >>> import openeo + >>> connection = openeo.connect() + Loaded openEO client config from openeo-client-config.ini + Using default back-end URL 'openeo.cloud' (from config) + Doing auto-authentication 'oidc' (from config) + Authenticated using refresh token. + + +Authentication for long-running applications and non-interactive contexts +=========================================================================== + +With OpenID Connect authentication, the *access token* +(which is used in the authentication headers) +is typically short-lived (e.g. couple of minutes or hours). +This practically means that an authenticated connection could expire and become unusable +before a **long-running script or application** finishes its whole workflow. +Luckily, OpenID Connect also includes usage of *refresh tokens*, +which have a much longer expiry and allow request a new access token +to re-authenticate the connection. +Since version 0.10.1, the openEO Python Client Library will automatically +attempt to re-authenticate a connection when access token expiry is detected +and valid refresh tokens are available. + +Likewise, refresh tokens can also be used for authentication in cases +where a script or application is **run automatically in the background on regular basis** (daily, weekly, ...). +If there is a non-expired refresh token available, the script can authenticate +without user interaction. + +Guidelines and tips +-------------------- + +Some guidelines to get long-term and non-interactive authentication working for your use case: + +- If you run a workflow periodically, but the interval between runs + is larger than the expiry time of the refresh token + (e.g. a monthly job, while the refresh token expires after, say, 10 days), + you could consider setting up a *custom OIDC client* with better suited + refresh token timeout. + The practical details of this heavily depend on the OIDC Identity Provider + in play and are out of scope of this discussion. +- Obtaining a refresh token requires manual/interactive authentication, + but once it is stored on the necessary machine(s) + in the refresh token store as discussed in :ref:`auth_configuration_files`, + no further manual interaction should be necessary + during the lifetime of the refresh token. + To do so, use one of the following methods: + + - Use the ``openeo-auth oidc-auth`` cli tool, for example to authenticate + for openeo back-end openeo.example.com:: + + $ openeo-auth oidc-auth openeo.example.com + ... + Stored refresh token in '/home/john/.local/share/openeo-python-client/refresh-tokens.json' + + + - Use a Python snippet to authenticate and store the refresh token:: + + import openeo + connection = openeo.connect("openeo.example.com") + connection.authenticate_oidc_device(store_refresh_token=True) + + + To verify that (and where) the refresh token is stored, use ``openeo-auth token-dump``:: + + $ openeo-auth token-dump + ### /home/john/.local/share/openeo-python-client/refresh-tokens.json ####### + { + "https://oidc.example.net": { + "default-client": { + "date": "2022-05-11T13:13:20Z", + "refresh_token": "" + }, + ... + + + +Best Practices and Troubleshooting Tips +======================================== + +.. warning:: + + Handle (OIDC) access and refresh tokens like secret, personal passwords. + **Never share your access or refresh tokens** with other people, + publicly, or for user support reasons. + + +Clear the refresh token file +---------------------------- + +When you have authentication or permission issues and you suspect +that your (locally cached) refresh tokens are the culprit: +remove your refresh token file in one of the following ways: + +- Locate the file with the ``openeo-auth`` command line tool:: + + $ openeo-auth paths + ... + openEO OpenID Connect refresh token store: /home/john/.local/share/openeo-python-client/refresh-tokens.json (perms: 0o600, size: 846B) + + and remove it. + Or, if you know what you are doing: remove the desired section from this JSON file. + +- Remove it directly with the ``token-clear`` subcommand of the ``openeo-auth`` command line tool:: + + $ openeo-auth token-clear + +- Remove it with this Python snippet:: + + from openeo.rest.auth.config import RefreshTokenStore + RefreshTokenStore().remove() diff --git a/_sources/basics.rst.txt b/_sources/basics.rst.txt new file mode 100644 index 000000000..64674553d --- /dev/null +++ b/_sources/basics.rst.txt @@ -0,0 +1,459 @@ +================ +Getting Started +================ + + +Connect to an openEO back-end +============================== + +First, establish a connection to an openEO back-end, using its connection URL. +For example the VITO/Terrascope backend: + +.. code-block:: python + + import openeo + + connection = openeo.connect("openeo.vito.be") + +The resulting :py:class:`~openeo.rest.connection.Connection` object is your central gateway to + +- list data collections, available processes, file formats and other capabilities of the back-end +- start building your openEO algorithm from the desired data on the back-end +- execute and monitor (batch) jobs on the back-end +- etc. + +.. seealso:: + + Use the `openEO Hub `_ to explore different back-end options + and their capabilities in a web-based way. + + +Collection discovery +===================== + +The Earth observation data (the input of your openEO jobs) is organised in +`so-called collections `_, +e.g. fundamental satellite collections like "Sentinel 1" or "Sentinel 2", +or preprocessed collections like "NDVI". + +You can programmatically list the collections that are available on a back-end +and their metadata using methods on the `connection` object we just created +(like :py:meth:`~openeo.rest.connection.Connection.list_collection_ids` +or :py:meth:`~openeo.rest.connection.Connection.describe_collection` + +.. code-block:: pycon + + >>> # Get all collection ids + >>> connection.list_collection_ids() + ['SENTINEL1_GRD', 'SENTINEL2_L2A', ... + + >>> # Get metadata of a single collection + >>> connection.describe_collection("SENTINEL2_L2A") + {'id': 'SENTINEL2_L2A', 'title': 'Sentinel-2 top of canopy ...', 'stac_version': '0.9.0', ... + +Congrats, you now just did your first real openEO queries to the openEO back-end +using the openEO Python client library. + +.. tip:: + The openEO Python client library comes with **Jupyter (notebook) integration** in a couple of places. + For example, put ``connection.describe_collection("SENTINEL2_L2A")`` (without ``print()``) + as last statement in a notebook cell + and you'll get a nice graphical rendering of the collection metadata. + +.. seealso:: + + Find out more about data discovery, loading and filtering at :ref:`data_access_chapter`. + + +Authentication +============== + +In the code snippets above we did not need to log in as a user +since we just queried publicly available back-end information. +However, to run non-trivial processing queries one has to authenticate +so that permissions, resource usage, etc. can be managed properly. + +To handle authentication, openEO leverages `OpenID Connect (OIDC) `_. +It offers some interesting features (e.g. a user can securely reuse an existing account), +but is a fairly complex topic, discussed in more depth at :ref:`authentication_chapter`. + +The openEO Python client library tries to make authentication as streamlined as possible. +In most cases for example, the following snippet is enough to obtain an authenticated connection: + +.. code-block:: python + + import openeo + + connection = openeo.connect("openeo.vito.be").authenticate_oidc() + +This statement will automatically reuse a previously authenticated session, when available. +Otherwise, e.g. the first time you do this, some user interaction is required +and it will print a web link and a short *user code*, for example: + +.. code-block:: + + To authenticate: visit https://aai.egi.eu/auth/realms/egi/device and enter the user code 'SLUO-BMUD'. + +Visit this web page in a browser, log in there with an existing account and enter the user code. +If everything goes well, the ``connection`` object in the script will be authenticated +and the back-end will be able to identify you in subsequent requests. + + + +.. _basic_example_evi_map_and_timeseries: + +Example use case: EVI map and timeseries +========================================= + +A common task in earth observation is to apply a formula to a number of spectral bands +in order to compute an 'index', such as NDVI, NDWI, EVI, ... +In this tutorial we'll go through a couple of steps to extract +EVI (enhanced vegetation index) values and timeseries, +and discuss some openEO concepts along the way. + + +Loading an initial data cube +============================= + +For calculating the EVI, we need the reflectance of the +red, blue and (near) infrared spectral components. +These spectral bands are part of the well-known Sentinel-2 data set +and is available on the current back-end under collection id ``SENTINEL2_L2A``. +We load an initial small spatio-temporal slice (a data cube) as follows: + +.. code-block:: python + + sentinel2_cube = connection.load_collection( + "SENTINEL2_L2A", + spatial_extent={"west": 5.14, "south": 51.17, "east": 5.17, "north": 51.19}, + temporal_extent = ["2021-02-01", "2021-04-30"], + bands=["B02", "B04", "B08"] + ) + +Note how we specify a the region of interest, a time range and a set of bands to load. + +.. important:: + By filtering as early as possible (directly in :py:meth:`~openeo.rest.connection.Connection.load_collection` in this case), + we make sure the back-end only loads the data we are interested in + for better performance and keeping the processing costs low. + +.. seealso:: + See the chapter :ref:`data_access_chapter` for more details on data discovery, + general data loading (:ref:`data-loading-and-filtering`) and filtering + (e.g. :ref:`filtering-on-temporal-extent-section`). + + +The :py:meth:`~openeo.rest.connection.Connection.load_collection` method on the connection +object created a :py:class:`~openeo.rest.datacube.DataCube` object (variable ``sentinel2_cube``). +This :py:class:`~openeo.rest.datacube.DataCube` class of the openEO Python Client Library +provides loads of methods corresponding to various openEO processes, +e.g. for masking, filtering, aggregation, spectral index calculation, data fusion, etc. +In the next steps we will illustrate a couple of these. + + +.. important:: + It is important to highlight that we *did not load any real EO data* yet. + Instead we just created an abstract *client-side reference*, + encapsulating the collection id, the spatial extent, the temporal extent, etc. + The actual data loading will only happen at the back-end + once we explicitly trigger the execution of the data processing pipeline we are building. + + + +Band math +========= + +From this data cube, we can now select the individual bands +with the :py:meth:`DataCube.band() ` method +and rescale the digital number values to physical reflectances: + +.. code-block:: python + + blue = sentinel2_cube.band("B02") * 0.0001 + red = sentinel2_cube.band("B04") * 0.0001 + nir = sentinel2_cube.band("B08") * 0.0001 + +We now want to compute the enhanced vegetation index +and can do that directly with these band variables: + +.. code-block:: python + + evi_cube = 2.5 * (nir - red) / (nir + 6.0 * red - 7.5 * blue + 1.0) + +.. important:: + As noted before: while this looks like an actual calculation, + there is *no real data processing going on here*. + The ``evi_cube`` object at this point is just an abstract representation + of our algorithm under construction. + The mathematical operators we used here are *syntactic sugar* + for expressing this part of the algorithm in a very compact way. + + As an illustration of this, let's have peek at the *JSON representation* + of our algorithm so far, the so-called *openEO process graph*: + + .. code-block:: text + + >>> print(evi_cube.to_json(indent=None)) + {"process_graph": {"loadcollection1": {"process_id": "load_collection", ... + ... "id": "SENTINEL2_L2A", "spatial_extent": {"west": 5.15, "south": ... + ... "multiply1": { ... "y": 0.0001}}, ... + ... "multiply3": { ... {"x": 2.5, "y": {"from_node": "subtract1"}}} ... + ... + + Note how the ``load_collection`` arguments, rescaling and EVI calculation aspects + can be deciphered from this. + Rest assured, as user you normally you don't have to worry too much + about these process graph details, + the openEO Python Client library handles this behind the scenes for you. + + +Download (synchronously) +======================== + +Let's download this as a GeoTIFF file. +Because GeoTIFF does not support a temporal dimension, +we first eliminate it by taking the temporal maximum value for each pixel: + +.. code-block:: python + + evi_composite = evi_cube.max_time() + +.. note:: + + This :py:meth:`~openeo.rest.datacube.DataCube.max_time()` is not an official openEO process + but one of the many *convenience methods* in the openEO Python Client Library + to simplify common processing patterns. + It implements a ``reduce`` operation along the temporal dimension + with a ``max`` reducer/aggregator. + +Now we can download this to a local file: + +.. code-block:: python + + evi_composite.download("evi-composite.tiff") + +This download command **triggers the actual processing** on the back-end: +it sends the process graph to the back-end and waits for the result. +It is a *synchronous operation* (the :py:meth:`~openeo.rest.datacube.DataCube.download()` call +blocks until the result is fully downloaded) and because we work on a small spatio-temporal extent, +this should only take a couple of seconds. + +If we inspect the downloaded image, we see that the maximum EVI value is heavily impacted +by cloud related artefacts, which makes the result barely usable. +In the next steps we will address cloud masking. + +.. image:: _static/images/basics/evi-composite.png + + +Batch Jobs (asynchronous execution) +=================================== + +Synchronous downloads are handy for quick experimentation on small data cubes, +but if you start processing larger data cubes, you can easily +hit *computation time limits* or other constraints. +For these larger tasks, it is recommended to work with **batch jobs**, +which allow you to work asynchronously: +after you start your job, you can disconnect (stop your script or even close your computer) +and then minutes/hours later you can reconnect to check the batch job status and download results. +The openEO Python Client Library also provides helpers to keep track of a running batch job +and show a progress report. + +.. seealso:: + + See :ref:`batch-jobs-chapter` for more details. + + +Applying a cloud mask +========================= + +As mentioned above, we need to filter out cloud pixels to make the result more usable. +It is very common for earth observation data to have separate masking layers that for instance indicate +whether a pixel is covered by a (type of) cloud or not. +For Sentinel-2, one such layer is the "scene classification" layer generated by the Sen2Cor algorithm. +In this example, we will use this layer to mask out unwanted data. + +First, we load a new ``SENTINEL2_L2A`` based data cube with this specific ``SCL`` band as single band: + +.. code-block:: python + + s2_scl = connection.load_collection( + "SENTINEL2_L2A", + spatial_extent={"west": 5.14, "south": 51.17, "east": 5.17, "north": 51.19}, + temporal_extent = ["2021-02-01", "2021-04-30"], + bands=["SCL"] + ) + +Now we can use the compact "band math" feature again to build a +binary mask with a simple comparison operation: + +.. code-block:: python + + # Select the "SCL" band from the data cube + scl_band = s2_scl.band("SCL") + # Build mask to mask out everything but class 4 (vegetation) + mask = (scl_band != 4) + +Before we can apply this mask to the EVI cube we have to resample it, +as the "SCL" layer has a "ground sample distance" of 20 meter, +while it is 10 meter for the "B02", "B04" and "B08" bands. +We can easily do the resampling by referring directly to the EVI cube. + +.. code-block:: python + + mask_resampled = mask.resample_cube_spatial(evi_cube) + + # Apply the mask to the `evi_cube` + evi_cube_masked = evi_cube.mask(mask_resampled) + + +We can now download this as a GeoTIFF, again after taking the temporal maximum: + +.. code-block:: python + + evi_cube_masked.max_time().download("evi-masked-composite.tiff") + +Now, the EVI map is a lot more valuable, as the non-vegetation locations +and observations are filtered out: + +.. image:: _static/images/basics/evi-masked-composite.png + + +Aggregated EVI timeseries +=========================== + +A common type of analysis is aggregating pixel values over one or more regions of interest +(also known as "zonal statistics) and tracking this aggregation over a period of time as a timeseries. +Let's extract the EVI timeseries for these two regions: + +.. code-block:: python + + features = {"type": "FeatureCollection", "features": [ + { + "type": "Feature", "properties": {}, + "geometry": {"type": "Polygon", "coordinates": [[ + [5.1417, 51.1785], [5.1414, 51.1772], [5.1444, 51.1768], [5.1443, 51.179], [5.1417, 51.1785] + ]]} + }, + { + "type": "Feature", "properties": {}, + "geometry": {"type": "Polygon", "coordinates": [[ + [5.156, 51.1892], [5.155, 51.1855], [5.163, 51.1855], [5.163, 51.1891], [5.156, 51.1892] + ]]} + } + ]} + + +.. note:: + + To have a self-containing example we define the geometries here as an inline GeoJSON-style dictionary. + In a real use case, your geometry will probably come from a local file or remote URL. + The openEO Python Client Library supports alternative ways of specifying the geometry + in methods like :py:meth:`~openeo.rest.datacube.DataCube.aggregate_spatial()`, e.g. + as Shapely geometry objects. + + +Building on the experience from previous sections, we first build a masked EVI cube +(covering a longer time window than before): + +.. code-block:: python + + # Load raw collection data + sentinel2_cube = connection.load_collection( + "SENTINEL2_L2A", + spatial_extent={"west": 5.14, "south": 51.17, "east": 5.17, "north": 51.19}, + temporal_extent = ["2020-01-01", "2021-12-31"], + bands=["B02", "B04", "B08", "SCL"], + ) + + # Extract spectral bands and calculate EVI with the "band math" feature + blue = sentinel2_cube.band("B02") * 0.0001 + red = sentinel2_cube.band("B04") * 0.0001 + nir = sentinel2_cube.band("B08") * 0.0001 + evi = 2.5 * (nir - red) / (nir + 6.0 * red - 7.5 * blue + 1.0) + + # Use the scene classification layer to mask out non-vegetation pixels + scl = sentinel2_cube.band("SCL") + evi_masked = evi.mask(scl != 4) + +Now we use the :py:meth:`~openeo.rest.datacube.DataCube.aggregate_spatial()` method +to do spatial aggregation over the geometries we defined earlier. +Note how we can specify the aggregation function ``"mean"`` as a simple string for the ``reducer`` argument. + +.. code-block:: python + + evi_aggregation = evi_masked.aggregate_spatial( + geometries=features, + reducer="mean", + ) + +If we download this, we get the timeseries encoded as a JSON structure, other useful formats are CSV and netCDF. + +.. code-block:: python + + evi_aggregation.download("evi-aggregation.json") + +.. warning:: + + Technically, the output of the openEO process ``aggregate_spatial`` + is a so-called "vector cube". + At the time of this writing, the specification of this openEO concept + is not fully fleshed out yet in the openEO API. + openEO back-ends and clients to provide best-effort support for it, + but bear in mind that some details are subject to change. + +The openEO Python Client Library provides helper functions +to convert the downloaded JSON data to a pandas dataframe, +which we massage a bit more: + +.. code-block:: python + + import json + import pandas as pd + from openeo.rest.conversions import timeseries_json_to_pandas + + import json + with open("evi-aggregation.json") as f: + data = json.load(f) + + df = timeseries_json_to_pandas(data) + df.index = pd.to_datetime(df.index) + df = df.dropna() + df.columns = ("Field A", "Field B") + +This gives us finally our EVI timeseries dataframe: + +.. code-block:: pycon + + >>> df + Field A Field B + date + 2020-01-06 00:00:00+00:00 0.522499 0.300250 + 2020-01-16 00:00:00+00:00 0.529591 0.288079 + 2020-01-18 00:00:00+00:00 0.633011 0.327598 + ... ... ... + + +.. image:: _static/images/basics/evi-timeseries.png + + +Computing multiple statistics +============================= + +.. warning:: + This is an experimental feature of the GeoPySpark openEO back-end, + it may not be supported by other back-ends, + and is subject to change. + See `Open-EO/openeo-geopyspark-driver#726 `_ for further discussion, + +The same method also allows the computation of multiple statistics at once. This does rely +on 'callbacks' to construct a result with multiple statistics. +The use of such more complex processes is further explained in :ref:`callbackfunctions`. + +.. code-block:: python + + from openeo.processes import array_create, mean, sd, median, count + + evi_aggregation = evi_masked.aggregate_spatial( + geometries=features, + reducer=lambda x: array_create([mean(x), sd(x), median(x), count(x)]), + ) diff --git a/_sources/batch_jobs.rst.txt b/_sources/batch_jobs.rst.txt new file mode 100644 index 000000000..85b9953f2 --- /dev/null +++ b/_sources/batch_jobs.rst.txt @@ -0,0 +1,415 @@ + +.. index:: + single: batch job + see: job; batch job + +.. _batch-jobs-chapter: + +============ +Batch Jobs +============ + +Most of the simple, basic openEO usage examples show **synchronous** downloading of results: +you submit a process graph with a (HTTP POST) request and receive the result +as direct response of that same request. +This only works properly if the processing doesn't take too long (order of seconds, or a couple of minutes at most). + +For the heavier work (larger regions of interest, larger time series, more intensive processing, ...) +you have to use **batch jobs**, which are supported in the openEO API through separate HTTP requests, corresponding to these steps: + +- you create a job (providing a process graph and some other metadata like title, description, ...) +- you start the job +- you wait for the job to finish, periodically polling its status +- when the job finished successfully: get the listing of result assets +- you download the result assets (or use them in an other way) + +.. tip:: + + This documentation mainly discusses how to **programmatically** + create and interact with batch job using the openEO Python client library. + The openEO API however does not enforce usage of the same tool + for each step in the batch job life cycle. + + For example: if you prefer a graphical, web-based **interactive environment** + to manage and monitor your batch jobs, + feel free to *switch to an openEO web editor* + like `editor.openeo.org `_ + or `editor.openeo.cloud `_ + at any time. + After logging in with the same account you use in your Python scripts, + you should see your batch jobs listed under the "Data Processing" tab: + + .. image:: _static/images/batchjobs-webeditor-listing.png + + With the "action" buttons on the right, you can for example + inspect batch job details, start/stop/delete jobs, + download their results, get batch job logs, etc. + + + +.. index:: batch job; create + +Create a batch job +=================== + +In the openEO Python Client Library, if you have a (raster) data cube, you can easily +create a batch job with the :py:meth:`DataCube.create_job() ` method. +It's important to specify in what *format* the result should be stored, +which can be done with an explicit :py:meth:`DataCube.save_result() ` call before creating the job: + +.. code-block:: python + + cube = connection.load_collection(...) + ... + # Store raster data as GeoTIFF files + cube = cube.save_result(format="GTiff") + job = cube.create_job() + +or directly in :py:meth:`job.create_job() `: + +.. code-block:: python + + cube = connection.load_collection(...) + ... + job = cube.create_job(out_format="GTiff) + +While not necessary, it is also recommended to give your batch job a descriptive title +so it's easier to identify in your job listing, e.g.: + +.. code-block:: python + + job = cube.create_job(title="NDVI timeseries 2022") + + + +.. index:: batch job; object + +Batch job object +================= + +The ``job`` object returned by :py:meth:`~openeo.rest.datacube.DataCube.create_job()` +is a :py:class:`~openeo.rest.job.BatchJob` object. +It is basically a *client-side reference* to a batch job that *exists on the back-end* +and allows to interact with that batch job +(see the :py:class:`~openeo.rest.job.BatchJob` API docs for +available methods). + + +.. note:: + The :py:class:`~openeo.rest.job.BatchJob` class originally had + the more cryptic name :py:class:`~openeo.rest.job.RESTJob`, + which is still available as legacy alias, + but :py:class:`~openeo.rest.job.BatchJob` is (available and) recommended since version 0.11.0. + + +A batch job on a back-end is fully identified by its +:py:data:`~openeo.rest.job.BatchJob.job_id`: + +.. code-block:: pycon + + >>> job.job_id + 'd5b8b8f2-74ce-4c2e-b06d-bff6f9b14b8d' + + +Reconnecting to a batch job +---------------------------- + +Depending on your situation or use case: +make sure to properly take note of the batch job id. +It allows you to "reconnect" to your job on the back-end, +even if it was created at another time, +by another script/notebook or even with another openEO client. + +Given a back-end connection and the batch job id, +use :py:meth:`Connection.job() ` +to create a :py:class:`~openeo.rest.job.BatchJob` object for an existing batch job: + +.. code-block:: python + + job_id = "5d806224-fe79-4a54-be04-90757893795b" + job = connection.job(job_id) + + +Jupyter integration +-------------------- + +:py:class:`~openeo.rest.job.BatchJob` objects have basic Jupyter notebook integration. +Put your :py:class:`~openeo.rest.job.BatchJob` object as last statement +in a notebook cell and you get an overview of your batch jobs, +including job id, status, title and even process graph visualization: + +.. image:: _static/images/batchjobs-jupyter-created.png + + +.. index:: batch job; listing + +List your batch jobs +======================== + +You can list your batch jobs on the back-end with +:py:meth:`Connection.list_jobs() `, which returns a list of job metadata: + +.. code-block:: pycon + + >>> connection.list_jobs() + [{'title': 'NDVI timeseries 2022', 'status': 'created', 'id': 'd5b8b8f2-74ce-4c2e-b06d-bff6f9b14b8d', 'created': '2022-06-08T08:58:11Z'}, + {'title': 'NDVI timeseries 2021', 'status': 'finished', 'id': '4e720e70-88bd-40bc-92db-a366985ebd67', 'created': '2022-06-04T14:46:06Z'}, + ... + +The listing returned by :py:meth:`Connection.list_jobs() ` +has Jupyter notebook integration: + +.. image:: _static/images/batchjobs-jupyter-listing.png + + +.. index:: batch job; start + +Run a batch job +================= + +Starting a batch job is pretty straightforward with the +:py:meth:`~openeo.rest.job.BatchJob.start()` method: + +.. code-block:: python + + job.start() + +If this didn't raise any errors or exceptions your job +should now have started (status "running") +or be queued for processing (status "queued"). + + + +.. index:: batch job; status + +Wait for a batch job to finish +-------------------------------- + +A batch job typically takes some time to finish, +and you can check its status with the :py:meth:`~openeo.rest.job.BatchJob.status()` method: + +.. code-block:: pycon + + >>> job.status() + "running" + +The possible batch job status values, defined by the openEO API, are +"created", "queued", "running", "canceled", "finished" and "error". + +Usually, you can only reliably get results from your job, +as discussed in :ref:`batch_job_results`, +when it reaches status "finished". + + + +.. index:: batch job; polling loop + +Create, start and wait in one go +---------------------------------- + +You could, depending on your situation, manually check your job's status periodically +or set up a **polling loop** system to keep an eye on your job. +The openEO Python client library also provides helpers to do that for you. + +Working from an existing :py:class:`~openeo.rest.job.BatchJob` instance + + If you have a batch job that is already created as shown above, you can use + the :py:meth:`job.start_and_wait() ` method + to start it and periodically poll its status until it reaches status "finished" (or fails with status "error"). + Along the way it will print some progress messages. + + .. code-block:: pycon + + >>> job.start_and_wait() + 0:00:00 Job 'b0e8adcf-087f-41de-afe6-b3c0ea88ff38': send 'start' + 0:00:36 Job 'b0e8adcf-087f-41de-afe6-b3c0ea88ff38': queued (progress N/A) + 0:01:35 Job 'b0e8adcf-087f-41de-afe6-b3c0ea88ff38': queued (progress N/A) + 0:02:19 Job 'b0e8adcf-087f-41de-afe6-b3c0ea88ff38': running (progress N/A) + 0:02:50 Job 'b0e8adcf-087f-41de-afe6-b3c0ea88ff38': running (progress N/A) + 0:03:28 Job 'b0e8adcf-087f-41de-afe6-b3c0ea88ff38': finished (progress N/A) + + +Working from a :py:class:`~openeo.rest.datacube.DataCube` instance + + If you didn't create the batch job yet from a given :py:class:`~openeo.rest.datacube.DataCube` + you can do the job creation, starting and waiting in one go + with :py:meth:`cube.execute_batch() `: + + .. code-block:: pycon + + >>> job = cube.execute_batch() + 0:00:00 Job 'f9f4e3d3-bc13-441b-b76a-b7bfd3b59669': send 'start' + 0:00:23 Job 'f9f4e3d3-bc13-441b-b76a-b7bfd3b59669': queued (progress N/A) + ... + + Note that :py:meth:`cube.execute_batch() ` + returns a :py:class:`~openeo.rest.job.BatchJob` instance pointing to + the newly created batch job. + + +.. tip:: + + You can fine-tune the details of the polling loop (the poll frequency, + how the progress is printed, ...). + See :py:meth:`job.start_and_wait() ` + or :py:meth:`cube.execute_batch() ` + for more information. + + +.. index:: batch job; logs + + +.. _batch-job-logs: + +Batch job logs +=============== + +Batch jobs in openEO have **logs** to help with *monitoring and debugging* batch jobs. +The back-end typically uses this to dump information during data processing +that may be relevant for the user (e.g. warnings, resource stats, ...). +Moreover, openEO processes like ``inspect`` allow users to log their own information. + +Batch job logs can be fetched with :py:meth:`job.logs() ` + +.. code-block:: pycon + + >>> job.logs() + [{'id': 'log001', 'level': 'info', 'message': 'Job started with 4 workers'}, + {'id': 'log002', 'level': 'debug', 'message': 'Loading 5x3x6 tiles'}, + {'id': 'log003', 'level': 'error', 'message': "Failed to load data cube: corrupt data for tile 'J9A7K2'."}, + ... + +In a Jupyter notebook environment, this also comes with Jupyter integration: + +.. image:: _static/images/batchjobs-jupyter-logs.png + + + +Automatic batch job log printing +--------------------------------- + +When using +:py:meth:`job.start_and_wait() ` +or :py:meth:`cube.execute_batch() ` +to run a batch job and it fails, +the openEO Python client library will automatically +print the batch job logs and instructions to help with further investigation: + +.. code-block:: pycon + + >>> job.start_and_wait() + 0:00:00 Job '68caccff-54ee-470f-abaa-559ed2d4e53c': send 'start' + 0:00:01 Job '68caccff-54ee-470f-abaa-559ed2d4e53c': running (progress N/A) + 0:00:07 Job '68caccff-54ee-470f-abaa-559ed2d4e53c': error (progress N/A) + + Your batch job '68caccff-54ee-470f-abaa-559ed2d4e53c' failed. + Logs can be inspected in an openEO (web) editor + or with `connection.job('68caccff-54ee-470f-abaa-559ed2d4e53c').logs()`. + + Printing logs: + [{'id': 'log001', 'level': 'info', 'message': 'Job started with 4 workers'}, + {'id': 'log002', 'level': 'debug', 'message': 'Loading 5x3x6 tiles'}, + {'id': 'log003', 'level': 'error', 'message': "Failed to load data cube: corrupt data for tile 'J9A7K2'."}] + + + +.. index:: batch job; results + +.. _batch_job_results: + +Download batch job results +========================== + +Once a batch job is finished you can get a handle to the results +(which can be a single file or multiple files) and metadata +with :py:meth:`~openeo.rest.job.BatchJob.get_results`: + +.. code-block:: pycon + + >>> results = job.get_results() + >>> results + + +The result metadata describes the spatio-temporal properties of the result +and is in fact a valid STAC item: + +.. code-block:: pycon + + >>> results.get_metadata() + { + 'bbox': [3.5, 51.0, 3.6, 51.1], + 'geometry': {'coordinates': [[[3.5, 51.0], [3.5, 51.1], [3.6, 51.1], [3.6, 51.0], [3.5, 51.0]]], 'type': 'Polygon'}, + 'assets': { + 'res001.tiff': { + 'href': 'https://openeo.example/download/432f3b3ef3a.tiff', + 'type': 'image/tiff; application=geotiff', + ... + 'res002.tiff': { + ... + + +Download all assets +-------------------- + +In the general case, when you have one or more result files (also called "assets"), +the easiest option to download them is +using :py:meth:`~openeo.rest.job.JobResults.download_files` (plural) +where you just specify a download folder +(otherwise the current working directory will be used by default): + +.. code-block:: python + + results.download_files("data/out") + +The resulting files will be named as they are advertised in the results metadata +(e.g. ``res001.tiff`` and ``res002.tiff`` in case of the metadata example above). + + +Download single asset +--------------------- + +If you know that there is just a single result file, you can also download it directly with +:py:meth:`~openeo.rest.job.JobResults.download_file` (singular) with the desired file name: + +.. code-block:: python + + results.download_file("data/out/result.tiff") + +This will fail however if there are multiple assets in the job result +(like in the metadata example above). +In that case you can still download a single by specifying which one you +want to download with the ``name`` argument: + +.. code-block:: python + + results.download_file("data/out/result.tiff", name="res002.tiff") + + +Fine-grained asset downloads +---------------------------- + +If you need a bit more control over which asset to download and how, +you can iterate over the result assets explicitly +and download these :py:class:`~openeo.rest.job.ResultAsset` instances +with :py:meth:`~openeo.rest.job.ResultAsset.download`, like this: + +.. code-block:: python + + for asset in results.get_assets(): + if asset.metadata["type"].startswith("image/tiff"): + asset.download("data/out/result-v2-" + asset.name) + + +Directly load batch job results +=============================== + +If you want to skip downloading an asset to disk, you can also load it directly. +For example, load a JSON asset with :py:meth:`~openeo.rest.job.ResultAsset.load_json`: + +.. code-block:: pycon + + >>> asset.metadata + {"type": "application/json", "href": "https://openeo.example/download/432f3b3ef3a.json"} + >>> data = asset.load_json() + >>> data + {"2021-02-24T10:59:23Z": [[3, 2, 5], [3, 4, 5]], ....} diff --git a/_sources/best_practices.rst.txt b/_sources/best_practices.rst.txt new file mode 100644 index 000000000..fd43b8f9c --- /dev/null +++ b/_sources/best_practices.rst.txt @@ -0,0 +1,93 @@ + +Best practices, coding style and general tips +=============================================== + +This is a collection of guidelines regarding best practices, +coding style and usage patterns for the openEO Python Client Library. + +It is in the first place an internal recommendation for openEO *developers* +to give documentation, code examples, demo's and tutorials +a *consistent* look and feel, +following common software engineering best practices. +Secondly, the wider audience of openEO *users* is also invited to pick up +a couple of tips and principles to improve their own code and scripts. + + +Background and inspiration +--------------------------- + +While some people consider coding style a personal choice or even irrelevant, +there are various reasons to settle on certain conventions. +Just the fact alone of following conventions +lowers the bar to get faster to the important details in someone else's code. +Apart from taste, there are also technical reasons to pick certain rules +to *streamline the programming workflow*, +not only for humans, +but also supporting tools (e.g. minimize risk on merge conflicts). + +While the Python language already has a strong focus on readability by design, +the Python community is strongly gravitating to even more strict conventions: + +- `pep8 `_: the mother of all Python code style guides +- `black `_: an opinionated code formatting tool + that gets more and more traction in popular, high profile projects. + +This openEO oriented style guide will highlight +and build on these recommendations. + + +General code style recommendations +------------------------------------ + +- Indentation with 4 spaces. +- Avoid star imports (``from module import *``). + While this seems like a quick way to import a bunch of functions/classes, + it makes it very hard for the reader to figure out where things come from. + It can also lead to strange bugs and behavior because it silently overwrites + references you previously imported. + + +Line (length) management +-------------------------- + +While desktop monitors offer plenty of (horizontal) space nowadays, +it is still a common recommendation to *avoid long source code lines*. +Not only are long lines hard to read and understand, +one should also consider that source code might still be viewed +on a small screen or tight viewport, +where scrolling horizontally is annoying or even impossible. +Unnecessarily long lines are also notorious +for not playing well with version control tools and workflows. + +Here are some guidelines on how to split long statements over multiple lines. + +Split long function/method calls directly after the opening parenthesis +and list arguments with a standard 4 space indentation +(not after the first argument with some ad-hoc indentation). +Put the closing parenthesis on its own line. + +.. code-block:: python + + # Avoid this: + s2_fapar = connection.load_collection("TERRASCOPE_S2_FAPAR_V2", + spatial_extent={'west': 16.138916, 'east': 16.524124, 'south': 48.1386, 'north': 48.320647}, + temporal_extent=["2020-05-01", "2020-05-20"]) + + # This is better: + s2_fapar = connection.load_collection( + "TERRASCOPE_S2_FAPAR_V2", + spatial_extent={"west": 16.138916, "east": 16.524124, "south": 48.1386, "north": 48.320647}, + temporal_extent=["2020-05-01", "2020-05-20"], + ) + +.. TODO how to handle chained method calls + + + +Jupyter(lab) tips and tricks +------------------------------- + +- Add a cell with ``openeo.client_version()`` (e.g. just after importing all your libraries) + to keep track of which version of the openeo Python client library you used in your notebook. + +.. TODO how to work with "helper" modules? diff --git a/_sources/changelog.md.txt b/_sources/changelog.md.txt new file mode 100644 index 000000000..66efc0fec --- /dev/null +++ b/_sources/changelog.md.txt @@ -0,0 +1,2 @@ +```{include} ../CHANGELOG.md +``` diff --git a/_sources/configuration.rst.txt b/_sources/configuration.rst.txt new file mode 100644 index 000000000..4cb30d9e0 --- /dev/null +++ b/_sources/configuration.rst.txt @@ -0,0 +1,96 @@ + +=============== +Configuration +=============== + +.. warning:: + Configuration files are an experimental feature + and some details are subject to change. + +.. versionadded:: 0.10.0 + + +.. _configuration_files: + +Configuration files +==================== + +Some functionality of the openEO Python client library can customized +through configuration files. + + +.. note:: + Note that these configuration files are different from the authentication secret/cache files + discussed at :ref:`auth_configuration_files`. + The latter are focussed on storing authentication secrets + and are mostly managed automatically. + The normal configuration files however should not contain secrets, + are usually edited manually, can be placed at various locations + and it is not uncommon to store them in version control where that makes sense. + + +Format +------- + +At the moment, only INI-style configs are supported. +This is a simple configuration format, easy to maintain +and it is supported out of the box in Python (without additional libraries). + +Example (note the use of sections and support for comments):: + + [General] + # Print loaded configuration file and default back-end URLs in interactive mode + verbose = auto + + [Connection] + default_backend = openeo.cloud + + +.. _configuration_file_locations: + +Location +--------- + +The following configuration locations are probed (in this order) for an existing configuration file. The first successful hit will be loaded: + +- the path in environment variable ``OPENEO_CLIENT_CONFIG`` if it is set (filename must end with extension ``.ini``) +- the file ``openeo-client-config.ini`` in the current working directory +- the file ``${OPENEO_CONFIG_HOME}/openeo-client-config.ini`` if the environment variable ``OPENEO_CONFIG_HOME`` is set +- the file ``${XDG_CONFIG_HOME}/openeo-python-client/openeo-client-config.ini`` if environment variable ``XDG_CONFIG_HOME`` is set +- the file ``.openeo-client-config.ini`` in the home folder of the user + + +Configuration options +---------------------- + +.. list-table:: + :widths: 10 10 40 + :header-rows: 1 + + * - Config Section + - Config + - Description and possible values + * - ``General`` + - ``verbose`` + - Verbosity mode when important config values are used: + + ``print``: always ``print()`` info + + ``auto`` (default): only ``print()`` when in an interactive context + + ``off``: don't print info + * - ``Connection`` + - ``default_backend`` + - Default back-end to connect to when :py:func:`openeo.connect()` + is used without explicit back-end URL. + Also see :ref:`default_url_and_auto_auth` + * - ``Connection`` + - ``default_backend.auto_authenticate`` + - Automatically authenticate in :py:func:`openeo.connect()` when using the ``default_backend`` config. Allowed values: + + ``basic`` for basic authentication + + ``oidc`` for OpenID Connect authentication + + ``off`` (default) for no authentication + + Also see :ref:`default_url_and_auto_auth` + * - ``Connection`` + - ``auto_authenticate`` + - Automatically authenticate in :py:func:`openeo.connect()`. + Allowed values: see ``default_backend.auto_authenticate``. + Also see :ref:`default_url_and_auto_auth` diff --git a/_sources/cookbook/ard.rst.txt b/_sources/cookbook/ard.rst.txt new file mode 100644 index 000000000..908e2bb83 --- /dev/null +++ b/_sources/cookbook/ard.rst.txt @@ -0,0 +1,113 @@ +.. _ard: + +============================== +Analysis Ready Data generation +============================== + +For certain use cases, the preprocessed data collections available in the openEO back-ends are not sufficient or simply not +available. For that case, openEO supports a few very common preprocessing scenario: + +- Atmospheric correction of optical data +- SAR backscatter computation + +These processes also offer a number of parameters to customize the processing. There's also variants with a default +parametrization that results in data that is compliant with CEOS CARD4L specifications https://ceos.org/ard/. + +We should note that these operations can be computationally expensive, so certainly affect overall processing time and +cost of your final algorithm. Hence, make sure to make an informed decision when you decide to use these methods. + +Atmospheric correction +---------------------- + +The `atmospheric correction `_ process can apply a chosen +method on raw 'L1C' data. The supported methods and input datasets depend on the back-end, because not every method is +validated or works on any dataset, and different back-ends try to offer a variety of options. This gives you as a user +more options to run and compare different methods, and select the most suitable one for your case. + + +To perform an `atmospheric correction `_, the user has to +load an uncorrected L1C optical dataset. On the resulting datacube, the :func:`~openeo.rest.datacube.DataCube.atmospheric_correction` +method can be invoked. Note that it may not be possible to apply certain processes to the raw input data: preprocessing +algorithms can be tightly coupled with the raw data, making it hard or impossible for the back-end to perform operations +in between loading and correcting the data. + +The CARD4L variant of this process is: :func:`~openeo.rest.datacube.DataCube.ard_surface_reflectance`. This process follows +CEOS specifications, and thus can additional processing steps, like a BRDF correction, that are not yet available as a +separate process. + +Reference implementations +######################### + +This section shows a few working examples for these processes. + +EODC back-end +************* + +EODC (https://openeo.eodc.eu/v1.0) supports ard_surface_reflectance, based on the FORCE toolbox. (https://github.com/davidfrantz/force) + +Geotrellis back-end +******************* + +The geotrellis back-end (https://openeo.vito.be) supports :func:`~openeo.rest.datacube.DataCube.atmospheric_correction` with iCor and SMAC as methods. +The version of iCor only offers basic atmoshperic correction features, without special options for water products: https://remotesensing.vito.be/case/icor +SMAC is implemented based on: https://github.com/olivierhagolle/SMAC +Both methods have been tested with Sentinel-2 as input. The viewing and sun angles need to be selected by the user to make them +available for the algorithm. + +This is an example of applying iCor:: + + l1c = connection.load_collection("SENTINEL2_L1C_SENTINELHUB", + spatial_extent={'west':3.758216409030558,'east':4.087806252,'south':51.291835566,'north':51.3927399}, + temporal_extent=["2017-03-07","2017-03-07"],bands=['B04','B03','B02','B09','B8A','B11','sunAzimuthAngles','sunZenithAngles','viewAzimuthMean','viewZenithMean'] ) + l1c.atmospheric_correction(method="iCor").download("rgb-icor.geotiff",format="GTiff") + + +SAR backscatter +--------------- + +Data from synthetic aperture radar sensors requires significant preprocessing to be calibrated and normalized for terrain. +This is referred to as backscatter computation, and supported by +`sar_backscatter `_ and the CARD4L compliant variant +`ard_normalized_radar_backscatter `_ + +The user should load a datacube containing raw SAR data, such as Sentinel-1 GRD. On the resulting datacube, the +:func:`~openeo.rest.datacube.DataCube.sar_backscatter` method can be invoked. The CEOS CARD4L variant is: +:func:`~openeo.rest.datacube.DataCube.ard_normalized_radar_backscatter`. These processes are tightly coupled to +metadata from specific sensors, so it is not possible to apply other processes to the datacube first, +with the exception of specifying filters in space and time. + + +Reference implementations +######################### + +This section shows a few working examples for these processes. + +EODC back-end +************* + +EODC (https://openeo.eodc.eu/v1.0) supports sar_backscatter, based on the Sentinel-1 toolbox. (https://sentinel.esa.int/web/sentinel/toolboxes/sentinel-1) + +Geotrellis back-end +******************* + +When working with the Sentinelhub SENTINEL1_GRD collection, both sar processes can be used. The underlying implementation is +provided by Sentinelhub, (https://docs.sentinel-hub.com/api/latest/data/sentinel-1-grd/#processing-options), and offers full +CARD4L compliant processing options. + +This is an example of :func:`~openeo.rest.datacube.DataCube.ard_normalized_radar_backscatter`:: + + s1grd = (connection.load_collection('SENTINEL1_GRD', bands=['VH', 'VV']) + .filter_bbox(west=2.59003, east=2.8949, north=51.2206, south=51.069) + .filter_temporal(extent=["2019-10-10","2019-10-10"])) + + job = s1grd.ard_normalized_radar_backscatter().execute_batch() + + for asset in job.get_results().get_assets(): + asset.download() + +When working with other GRD data, an implementation based on Orfeo Toolbox is used: + +- `Orfeo docs `_ +- `Implementation `_ + +The Orfeo implementation currently only supports sigma0 computation, and is not CARD4L compliant. diff --git a/_sources/cookbook/index.rst.txt b/_sources/cookbook/index.rst.txt new file mode 100644 index 000000000..719d2049b --- /dev/null +++ b/_sources/cookbook/index.rst.txt @@ -0,0 +1,14 @@ +openEO CookBook +=============== + +.. toctree:: + :maxdepth: 3 + :caption: Contents: + + ard + sampling + udp_sharing + spectral_indices + job_manager + localprocessing + tricks diff --git a/_sources/cookbook/job_manager.rst.txt b/_sources/cookbook/job_manager.rst.txt new file mode 100644 index 000000000..b5219dc72 --- /dev/null +++ b/_sources/cookbook/job_manager.rst.txt @@ -0,0 +1,122 @@ +==================================== +Multi Backend Job Manager +==================================== + +API +=== + +.. warning:: + This is a new experimental API, subject to change. + +.. autoclass:: openeo.extra.job_management.MultiBackendJobManager + :members: + +.. autoclass:: openeo.extra.job_management.JobDatabaseInterface + :members: + +.. autoclass:: openeo.extra.job_management.CsvJobDatabase + +.. autoclass:: openeo.extra.job_management.ParquetJobDatabase + + +.. autoclass:: openeo.extra.job_management.ProcessBasedJobCreator + :members: + :special-members: __call__ + + +.. _job-management-with-process-based-job-creator: + +Job creation based on parameterized processes +=============================================== + +The openEO API supports parameterized processes out of the box, +which allows to work with flexible, reusable openEO building blocks +in the form of :ref:`user-defined processes ` +or `remote openEO process definitions `_. +This can also be leveraged for job creation in the context of the +:py:class:`~openeo.extra.job_management.MultiBackendJobManager`: +define a "template" job as a parameterized process +and let the job manager fill in the parameters +from a given data frame. + +The :py:class:`~openeo.extra.job_management.ProcessBasedJobCreator` helper class +allows to do exactly that. +Given a reference to a parameterized process, +such as a user-defined process or remote process definition, +it can be used directly as ``start_job`` callable to +:py:meth:`~openeo.extra.job_management.MultiBackendJobManager.run_jobs` +which will fill in the process parameters from the dataframe. + +Basic :py:class:`~openeo.extra.job_management.ProcessBasedJobCreator` example +----------------------------------------------------------------------------- + +Basic usage example with a remote process definition: + +.. code-block:: python + :linenos: + :caption: Basic :py:class:`~openeo.extra.job_management.ProcessBasedJobCreator` example snippet + :emphasize-lines: 10-15, 28 + + from openeo.extra.job_management import ( + MultiBackendJobManager, + create_job_db, + ProcessBasedJobCreator, + ) + + # Job creator, based on a parameterized openEO process + # (specified by the remote process definition at given URL) + # which has parameters "start_date" and "bands" for example. + job_starter = ProcessBasedJobCreator( + namespace="https://example.com/my_process.json", + parameter_defaults={ + "bands": ["B02", "B03"], + }, + ) + + # Initialize job database from a dataframe, + # with desired parameter values to fill in. + df = pd.DataFrame({ + "start_date": ["2021-01-01", "2021-02-01", "2021-03-01"], + }) + job_db = create_job_db("jobs.csv").initialize_from_df(df) + + # Create and run job manager, + # which will start a job for each of the `start_date` values in the dataframe + # and use the default band list ["B02", "B03"] for the "bands" parameter. + job_manager = MultiBackendJobManager(...) + job_manager.run_jobs(job_db=job_db, start_job=job_starter) + +In this example, a :py:class:`ProcessBasedJobCreator` is instantiated +based on a remote process definition, +which has parameters ``start_date`` and ``bands``. +When passed to :py:meth:`~openeo.extra.job_management.MultiBackendJobManager.run_jobs`, +a job for each row in the dataframe will be created, +with parameter values based on matching columns in the dataframe: + +- the ``start_date`` parameter will be filled in + with the values from the "start_date" column of the dataframe, +- the ``bands`` parameter has no corresponding column in the dataframe, + and will get its value from the default specified in the ``parameter_defaults`` argument. + + +:py:class:`~openeo.extra.job_management.ProcessBasedJobCreator` with geometry handling +--------------------------------------------------------------------------------------------- + +Apart from the intuitive name-based parameter-column linking, +:py:class:`~openeo.extra.job_management.ProcessBasedJobCreator` +also automatically links: + +- a process parameters that accepts inline GeoJSON geometries/features + (which practically means it has a schema like ``{"type": "object", "subtype": "geojson"}``, + as produced by :py:meth:`Parameter.geojson `). +- with the geometry column in a `GeoPandas `_ dataframe. + +even if the name of the parameter does not exactly match +the name of the GeoPandas geometry column (``geometry`` by default). +This automatic liking is only done if there is only one +GeoJSON parameter and one geometry column in the dataframe. + + +.. admonition:: to do + + Add example with geometry handling. diff --git a/_sources/cookbook/localprocessing.rst.txt b/_sources/cookbook/localprocessing.rst.txt new file mode 100644 index 000000000..ece58ebd7 --- /dev/null +++ b/_sources/cookbook/localprocessing.rst.txt @@ -0,0 +1,184 @@ +=============================== +Client-side (local) processing +=============================== + +.. warning:: + This is a new experimental feature and API, subject to change. + +Background +---------- + +The client-side processing functionality allows to test and use openEO with its processes locally, i.e. without any connection to an openEO back-end. +It relies on the projects `openeo-pg-parser-networkx `_, which provides an openEO process graph parsing tool, and `openeo-processes-dask `_, which provides an Xarray and Dask implementation of most openEO processes. + +Installation +------------ + +.. note:: + This feature requires ``Python>=3.9``. + Tested with ``openeo-pg-parser-networkx==2023.5.1`` and + ``openeo-processes-dask==2023.7.1``. + +.. code:: bash + + pip install openeo[localprocessing] + +Usage +----- + +Every openEO process graph relies on data which is typically provided by a cloud infrastructure (the openEO back-end). +The client-side processing adds the possibility to read and use local netCDFs, geoTIFFs, ZARR files, and remote STAC Collections or Items for your experiments. + +STAC Collections and Items +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. warning:: + The provided examples using STAC rely on third party STAC Catalogs, we can't guarantee that the urls will remain valid. + +With the ``load_stac`` process it's possible to load and use data provided by remote or local STAC Collections or Items. +The following code snippet loads Sentinel-2 L2A data from a public STAC Catalog, using specific spatial and temporal extent, band name and also properties for cloud coverage. + +.. code-block:: pycon + + >>> from openeo.local import LocalConnection + >>> local_conn = LocalConnection("./") + + >>> url = "https://earth-search.aws.element84.com/v1/collections/sentinel-2-l2a" + >>> spatial_extent = {"west": 11, "east": 12, "south": 46, "north": 47} + >>> temporal_extent = ["2019-01-01", "2019-06-15"] + >>> bands = ["red"] + >>> properties = {"eo:cloud_cover": dict(lt=50)} + >>> s2_cube = local_conn.load_stac(url=url, + ... spatial_extent=spatial_extent, + ... temporal_extent=temporal_extent, + ... bands=bands, + ... properties=properties, + ... ) + >>> s2_cube.execute() + + dask.array + Coordinates: (12/53) + * time (time) datetime64[ns] 2019-01-02... + id (time) `_. + If the code can not handle you special netCDF, + you can still modify the function that reads the metadata from it (`openeo/local/collections.py#L19 `_) + and the function that reads the data (`openeo/local/processing.py#L26 `_). + +Local Processing +~~~~~~~~~~~~~~~~ + +Let's start with the provided sample netCDF of Sentinel-2 data: + +.. code-block:: pycon + + >>> local_collection = "openeo-localprocessing-data/sample_netcdf/S2_L2A_sample.nc" + >>> s2_datacube = local_conn.load_collection(local_collection) + >>> # Check if the data is loaded correctly + >>> s2_datacube.execute() + + dask.array + Coordinates: + * t (t) datetime64[ns] 2022-06-02 2022-06-05 ... 2022-06-27 2022-06-30 + * x (x) float64 6.75e+05 6.75e+05 6.75e+05 ... 6.843e+05 6.843e+05 + * y (y) float64 5.155e+06 5.155e+06 5.155e+06 ... 5.148e+06 5.148e+06 + crs |S1 ... + * bands (bands) object 'B04' 'B03' 'B02' 'B08' 'SCL' + Attributes: + Conventions: CF-1.9 + institution: openEO platform - Geotrellis backend: 0.9.5a1 + description: + title: + +As you can see in the previous example, we are using a call to execute() which will execute locally the generated openEO process graph. +In this case, the process graph consist only in a single load_collection, which performs lazy loading of the data. With this first step you can check if the data is being read correctly by openEO. + +Looking at the metadata of this netCDF sample, we can see that it contains the bands B04, B03, B02, B08 and SCL. +Additionally, we also see that it is composed by more than one element in time and that it covers the month of June 2022. + +We can now do a simple processing for demo purposes, let's compute the median NDVI in time and visualize the result: + +.. code:: python + + b04 = s2_datacube.band("B04") + b08 = s2_datacube.band("B08") + ndvi = (b08 - b04) / (b08 + b04) + ndvi_median = ndvi.reduce_dimension(dimension="t", reducer="median") + result_ndvi = ndvi_median.execute() + result_ndvi.plot.imshow(cmap="Greens") + +.. image:: ../_static/images/local/local_ndvi.jpg + +We can perform the same example using data provided by STAC Collection: + +.. code:: python + + from openeo.local import LocalConnection + local_conn = LocalConnection("./") + + url = "https://earth-search.aws.element84.com/v1/collections/sentinel-2-l2a" + spatial_extent = {"east": 11.40, "north": 46.52, "south": 46.46, "west": 11.25} + temporal_extent = ["2022-06-01", "2022-06-30"] + bands = ["red", "nir"] + properties = {"eo:cloud_cover": dict(lt=80)} + s2_datacube = local_conn.load_stac( + url=url, + spatial_extent=spatial_extent, + temporal_extent=temporal_extent, + bands=bands, + properties=properties, + ) + + b04 = s2_datacube.band("red") + b08 = s2_datacube.band("nir") + ndvi = (b08 - b04) / (b08 + b04) + ndvi_median = ndvi.reduce_dimension(dimension="time", reducer="median") + result_ndvi = ndvi_median.execute() diff --git a/_sources/cookbook/sampling.md.txt b/_sources/cookbook/sampling.md.txt new file mode 100644 index 000000000..ce06c1e6a --- /dev/null +++ b/_sources/cookbook/sampling.md.txt @@ -0,0 +1,61 @@ + +# Dataset sampling + + +A number of use cases do not require a full datacube to be computed, +but rather want to extract a result at specific locations. +Examples include extracting training data for model calibration, or computing the result for +areas where validation data is available. + +An important constraint is that most implementations assume that sampling is an operation +on relatively small areas, of for instance up to 512x512 pixels (but often much smaller). +When extracting larger areas, it is recommended to look into running a separate job per 'sample'. + +Sampling can be done for points or polygons: + +- point extractions basically result in a 'vector cube', so can be exported into tabular formats. +- polygon extractions can be stored to an individual netCDF per polygon so in this case the output is a sparse raster cube. + +To indicate to openEO that we only want to compute the datacube for certain polygon features, we use the +`openeo.rest.datacube.DataCube.filter_spatial` method. + +Next to that, we will also indicate that we want to write multiple output files. This is more convenient, as we will +want to have one or more raster outputs per sampling feature, for convenient further processing. To do this, we set +the 'sample_by_feature' output format property, which is available for the netCDF and GTiff output formats. + +Combining all of this, results in the following sample code: + +```python +s2_bands = auth_connection.load_collection( + "SENTINEL2_L2A", + bands=["B04"], + temporal_extent=["2020-05-01", "2020-06-01"], +) +s2_bands = s2_bands.filter_spatial( + "https://artifactory.vgt.vito.be/testdata-public/parcels/test_10.geojson", +) +job = s2_bands.create_job( + title="Sentinel2", + description="Sentinel-2 L2A bands", + out_format="netCDF", + sample_by_feature=True, +) +``` + + +Sampling only works for batch jobs, because it results in multiple output files, which can not be conveniently transferred +in a synchronous call. + +## Performance & scalability + +It's important to note that dataset sampling is not necessarily a cheap operation, since creation of a sparse datacube still +may require accessing a large number of raw EO assets. Backends of course can and should optimize to restrict processing +to a minimum, but the size of the required input datasets is often a determining factor for cost and performance rather +than the size of the output dataset. + +## Sampling at scale + +When doing large scale (e.g. continental) sampling, it is usually not possible or impractical to run it as a single openEO +batch job. The recommendation here is to apply a spatial grouping to your sampling locations, with a single group covering +an area of around 100x100km. The optimal size of a group may be backend dependant. Also remember that when working with +data in the UTM projection, you may want to avoid covering multiple UTM zones in a single group. diff --git a/_sources/cookbook/spectral_indices.rst.txt b/_sources/cookbook/spectral_indices.rst.txt new file mode 100644 index 000000000..21ebe849d --- /dev/null +++ b/_sources/cookbook/spectral_indices.rst.txt @@ -0,0 +1,88 @@ +==================================== +Spectral Indices +==================================== + +.. warning:: + This is a new experimental API, subject to change. + +``openeo.extra.spectral_indices`` is an auxiliary subpackage +to simplify the calculation of common spectral indices +used in various Earth observation applications (vegetation, water, urban etc.). +It leverages the spectral indices defined in the +`Awesome Spectral Indices `_ project +by `David Montero Loaiza `_. + +.. versionadded:: 0.9.1 + +Band mapping +============= + +The formulas provided by "Awesome Spectral Indices" are defined in terms of standardized variable names +like "B" for blue, "R" for red, "N" for near-infrared, "WV" for water vapour, etc. + +.. code-block:: json + + "NDVI": { + "formula": "(N - R)/(N + R)", + "long_name": "Normalized Difference Vegetation Index", + +Obviously, these formula variables have to be mapped properly to the band names of your cube. + +Automatic band mapping +----------------------- +In most simple cases, when there is enough collection metadata +to automatically detect the satellite platform (Sentinel2, Landsat8, ..) +and the original band names haven't been renamed, +this mapping will be handled automatically, e.g.: + +.. code-block:: python + :emphasize-lines: 2 + + cube = connection.load_collection("SENTINEL2_L2A", ...) + indices = compute_indices(cube, indices=["NDVI", "NDMI"]) + + + +.. _spectral_indices_manual_band_mapping: + +Manual band mapping +-------------------- + +In more complex cases, it might be necessary to specify some additional information to guide the band mapping. +If the band names follow the standard, but it's just the satellite platform can not be guessed +from the collection metadata, it is typically enough to specify the platform explicitly: + +.. code-block:: python + :emphasize-lines: 4 + + indices = compute_indices( + cube, + indices=["NDVI", "NDMI"], + platform="SENTINEL2", + ) + +Additionally, if the band names in your cube have been renamed, deviating from conventions, it is also +possible to explicitly specify the band name to spectral index variable name mapping: + +.. code-block:: python + :emphasize-lines: 4-8 + + indices = compute_indices( + cube, + indices=["NDVI", "NDMI"], + variable_map={ + "R": "S2-red", + "N": "S2-nir", + "S1": "S2-swir", + }, + ) + +.. versionadded:: 0.26.0 + Function arguments ``platform`` and ``variable_map`` to fine-tune the band mapping. + + +API +==== + +.. automodule:: openeo.extra.spectral_indices + :members: list_indices, compute_and_rescale_indices, append_and_rescale_indices, compute_indices, append_indices, compute_index, append_index diff --git a/_sources/cookbook/tricks.rst.txt b/_sources/cookbook/tricks.rst.txt new file mode 100644 index 000000000..c1eed20a5 --- /dev/null +++ b/_sources/cookbook/tricks.rst.txt @@ -0,0 +1,130 @@ +=============================== +Miscellaneous tips and tricks +=============================== + + +.. _process_graph_export: + +Export a process graph +----------------------- + +You can export the underlying process graph of +a :py:class:`~openeo.rest.datacube.DataCube`, :py:class:`~openeo.rest.vectorcube.VectorCube`, etc, +to a standardized JSON format, which allows interoperability with other openEO tools. + +For example, use :py:meth:`~openeo.rest.datacube.DataCube.print_json()` to directly print the JSON representation +in your interactive Jupyter or Python session: + +.. code-block:: pycon + + >>> dump = cube.print_json() + { + "process_graph": { + "loadcollection1": { + "process_id": "load_collection", + ... + +Or save it to a file, by getting the JSON representation first as a string +with :py:meth:`~openeo.rest.datacube.DataCube.to_json()`: + +.. code-block:: python + + # Export as JSON string + dump = cube.to_json() + + # Write to file in `pathlib` style + export_path = pathlib.Path("path/to/export.json") + export_path.write_text(dump, encoding="utf8") + + # Write to file in `open()` style + with open("path/to/export.json", encoding="utf8") as f: + f.write(dump) + + +.. warning:: + + Avoid using methods like :py:meth:`~openeo.rest.datacube.DataCube.flat_graph()`, + which are mainly intended for internal use. + Not only are these methods subject to change, they also lead to representations + with interoperability and reuse issues. + For example, naively printing or automatic (``repr``) rendering of + :py:meth:`~openeo.rest.datacube.DataCube.flat_graph()` output will roughly look like JSON, + but is in fact invalid: it uses single quotes (instead of double quotes) + and booleans values are title-case (instead of lower case). + + + + +Execute a process graph directly from raw JSON +----------------------------------------------- + +When you have a process graph in JSON format, as a string, a local file or a URL, +you can execute/download it without converting it do a DataCube first. +Just pass the string, path or URL directly to +:py:meth:`Connection.download() `, +:py:meth:`Connection.execute() ` or +:py:meth:`Connection.create_job() `. +For example: + +.. code-block:: python + + # `execute` with raw JSON string + connection.execute(""" + { + "add": {"process_id": "add", "arguments": {"x": 3, "y": 5}, "result": true} + } + """) + + # `download` with local path to JSON file + connection.download("path/to/my-process-graph.json") + + # `create_job` with URL to JSON file + job = connection.create_job("https://jsonbin.example/my/process-graph.json") + + +.. _legacy_read_vector: + + +Legacy ``read_vector`` usage +---------------------------- + +In versions up to 0.35.0 of the openEO Python client library, +there was an old, deprecated feature in geometry handling +of :py:class:`~openeo.rest.datacube.DataCube` methods like +:py:meth:`~openeo.rest.datacube.DataCube.aggregate_spatial()` and +:py:meth:`~openeo.rest.datacube.DataCube.mask_polygon()` +where you could pass a *backend-side* path as ``geometries``, e.g.: + +.. code-block:: python + + cube = cube.aggregate_spatial( + geometries="/backend/path/to/geometries.json", + reducer="mean" + ) + +The client would handle this by automatically adding a ``read_vector`` process +in the process graph, with that path as argument, to instruct the backend to load the geometries from there. +This ``read_vector`` process was however a backend-specific, experimental and now deprecated process. +Moreover, it assumes that the user has access to (or at least knowledge of) the backend's file system, +which violates the openEO principle of abstracting away backend-specific details. + +In version 0.36.0, this old deprecated ``read_vector`` feature has been *removed*, +to allow other and better convenience functionality +when providing a string in the ``geometries`` argument: +e.g. load from a URL with standard process ``load_url``, +or load GeoJSON from a local clientside path. + +If your workflow however depends on the old, deprecated ``read_vector`` functionality, +it is possible to reconstruct that by manually adding a ``read_vector`` process in your workflow, +for example as follows: + +.. code-block:: python + + from openeo.processes import process + + cube = cube.aggregate_spatial( + geometries=process("read_vector", filename="/backend/path/to/geometries.json"), + reducer="mean" + ) + +Note that this is also works with older versions of the openEO Python client library. diff --git a/_sources/cookbook/udp_sharing.rst.txt b/_sources/cookbook/udp_sharing.rst.txt new file mode 100644 index 000000000..cbc18d1e4 --- /dev/null +++ b/_sources/cookbook/udp_sharing.rst.txt @@ -0,0 +1,133 @@ +==================================== +Sharing of user-defined processes +==================================== + + +.. warning:: + Beta feature - + At the time of this writing (July 2021), sharing of :ref:`user-defined processes ` + (publicly or among users) is not standardized in the openEO API. + There are however some experimental sharing features in the openEO Python Client Library + and some back-end providers that we are going to discuss here. + + Be warned that the details of this feature are subject to change. + For more status information, consult GitHub ticket + `Open-EO/openeo-api#310 `_. + + + + +Publicly publishing a user-defined process. +============================================ + +As discussed in :ref:`build_and_store_udp`, user-defined processes can be +stored with the :py:meth:`~openeo.rest.connection.Connection.save_user_defined_process` method +on a on a back-end :py:class:`~openeo.rest.connection.Connection`. +By default, these user-defined processes are private and only accessible by the user that saved it:: + + from openeo.processes import subtract, divide + from openeo.api.process import Parameter + + # Build user-defined process + f = Parameter.number("f", description="Degrees Fahrenheit.") + fahrenheit_to_celsius = divide(x=subtract(x=f, y=32), y=1.8) + + # Store user-defined process in openEO back-end. + udp = connection.save_user_defined_process( + "fahrenheit_to_celsius", + fahrenheit_to_celsius, + parameters=[f] + ) + + +Some back-ends, like the VITO/Terrascope back-end allow a user to flag a user-defined process as "public" +so that other users can access its description and metadata:: + + udp = connection.save_user_defined_process( + ... + public=True + ) + +The sharable, public URL of this user-defined process is available from the metadata given by +:py:meth:`RESTUserDefinedProcess.describe `. +It's listed as "canonical" link:: + + >>> udp.describe() + { + "id": "fahrenheit_to_celsius", + "links": [ + { + "rel": "canonical", + "href": "https://openeo.vito.be/openeo/1.0/processes/u:johndoe/fahrenheit_to_celsius", + "title": "Public URL for user-defined process fahrenheit_to_celsius" + } + ], + ... + + +.. _udp_sharing_call_url_namespace: + +Using a public UDP through URL based "namespace" +================================================== + +Some back-ends, like the VITO/Terrascope back-end, allow to use a public UDP +through setting its public URL as the ``namespace`` property of the process graph node. + +For example, based on the ``fahrenheit_to_celsius`` UDP created above, +the "flat graph" representation of a process graph could look like this:: + + { + ... + "to_celsius": { + "process_id": "fahrenheit_to_celsius", + "namespace": "https://openeo.vito.be/openeo/1.0/processes/u:johndoe/fahrenheit_to_celsius", + "arguments": {"f": 86} + } + + +As a very basic illustration with the openEO Python Client library, +we can create and evaluate a process graph, +containing a ``fahrenheit_to_celsius`` call as single process, +with :meth:`Connection.datacube_from_process ` as follows:: + + cube = connection.datacube_from_process( + process_id="fahrenheit_to_celsius", + namespace="https://openeo.vito.be/openeo/1.0/processes/u:johndoe/fahrenheit_to_celsius", + f=86 + ) + print(cube.execute()) + # Prints: 30.0 + + +Loading a published user-defined process as DataCube +====================================================== + + +From the public URL of the user-defined process, +it is also possible for another user to construct, fully client-side, +a new :py:class:`~openeo.rest.datacube.DataCube` +with :py:meth:`Connection.datacube_from_json() `. + +It is important to note that this approach is different from calling +a user-defined process as described in :ref:`evaluate_udp` and :ref:`udp_sharing_call_url_namespace`. +:py:meth:`Connection.datacube_from_json() ` +breaks open the encapsulation of the user-defined process and "unrolls" the process graph inside +into a new :py:class:`~openeo.rest.datacube.DataCube`. +This also implies that parameters defined in the user-defined process have to be provided when calling +:py:meth:`Connection.datacube_from_json() `: + + +.. code-block:: python + :emphasize-lines: 4 + + udp_url = "https://openeo.vito.be/openeo/1.0/processes/u:johndoe/fahrenheit_to_celsius" + cube = connection.datacube_from_json( + udp_url, + parameters={"f": 86}, + ) + print(cube.execute()) + # Prints: 30.0 + +Note that :py:meth:`Connection.datacube_from_json() ` +not only supports loading UDPs from an URL but also from a raw JSON string or a local file path. +For more information, also see :ref:`datacube_from_json`. diff --git a/_sources/data_access.rst.txt b/_sources/data_access.rst.txt new file mode 100644 index 000000000..cdc0d0d81 --- /dev/null +++ b/_sources/data_access.rst.txt @@ -0,0 +1,345 @@ +.. _data_access_chapter: + +######################## +Finding and loading data +######################## + + +As illustrated in the basic concepts, most openEO scripts start with ``load_collection``, but this skips the step of +actually finding out which collection to load. This section dives a bit deeper into finding the right data, and some more +advanced data loading use cases. + +Data discovery +============== + +To explore data in a given back-end, it is recommended to use a more visual tool like the openEO Hub +(http://hub.openeo.org/). This shows available collections, and metadata in a user-friendly manner. + +Next to that, the client also offers various :py:class:`~openeo.rest.connection.Connection` methods +to explore collections and their metadata: + +- :py:meth:`~openeo.rest.connection.Connection.list_collection_ids` + to list all collection ids provided by the back-end +- :py:meth:`~openeo.rest.connection.Connection.list_collections` + to list the basic metadata of all collections +- :py:meth:`~openeo.rest.connection.Connection.describe_collection` + to get the complete metadata of a particular collection + +When using these methods inside a Jupyter notebook, you should notice that the output is rendered in a user friendly way. + +In a regular script, these methods can be used to programmatically find a collection that matches specific criteria. + +As a user, make sure to carefully read the documentation for a given collection, as there can be important differences. +You should also be aware of the data retention policy of a given collection: some data archives only retain the last 3 months +for instance, making them only suitable for specific types of analysis. Such differences can have an impact on the reproducibility +of your openEO scripts. + +Also note that the openEO metadata may use links to point to much more information for a particular collection. For instance +technical specification on how the data was preprocessed, or viewers that allow you to visually explore the data. This can +drastically improve your understanding of the dataset. + +Finally, licensing information is important to keep an eye on: not all data is free and open. + + +Initial exploration of an openEO collection +------------------------------------------- + +A common question from users is about very specific details of a collection, we'd like to list some examples and solutions here: + +- The collection data type, and range of values, can be determined by simply downloading a sample of data, as NetCDF or Geotiff. This can in fact be done at any point in the design of your script, to get a good idea of intermediate results. +- Data availability, and available timestamps can be retrieved by computing average values for your area of interest. Just construct a polygon, and retrieve those statistics. For optical data, this can also be used to get an idea on cloud statistics. +- Most collections have a native projection system, again a simple download will give you this information if its not clear from the metadata. + +.. _data-loading-and-filtering: + +Loading a data cube from a collection +===================================== + +Many examples already illustrate the basic openEO ``load_collection`` process through a :py:meth:`Connection.load_collection() ` call, +with filters on space, time and bands. +For example: + +.. code-block:: python + + cube = connection.load_collection( + "SENTINEL2_L2A", + spatial_extent={"west": 3.75, "east": 4.08, "south": 51.29, "north": 51.39}, + temporal_extent=["2021-05-07", "2021-05-14"], + bands=["B04", "B03", "B02"], + ) + + +The purpose of these filters in ``load_collection`` is to reduce the amount of raw data that is loaded (and processed) by the back-end. +This is essential to get a response to your processing request in reasonable time and keep processing costs low. +It's recommended to start initial exploration with a small spatio-temporal extent +and gradually increase the scope once initial tests work out. + +Next to specifying filters inside the ``load_collection`` process, +there are also possibilities to filter with separate filter processes, e.g. at a later stage in your process graph. +For most openEO back-ends, the following example snippet should be equivalent to the previous: + +.. code-block:: python + + cube = connection.load_collection("SENTINEL2_L2A") + cube = cube.filter_bbox(west=3.75, east=4.08, south=51.29, north=51.39) + cube = cube.filter_temporal("2021-05-07", "2021-05-14") + cube = cube.filter_bands(["B04", "B03", "B02"]) + + +Another nice feature is that processes that work with geometries or vector features +(e.g. aggregated statistics for a polygon, or masking by polygon) +can also be used by a back-end to automatically infer an appropriate spatial extent. +This way, you do not need to explicitly set these filters yourself. + +In the following sections, we want to dive a bit into details, and more advanced cases. + + +Filter on spatial extent +======================== + +A spatial extent is a bounding box that specifies the minimum and and maximum longitude and latitude of the region of interest you want to process. + +By default these latitude and longitude values are expressed in the standard Coordinate Reference System for the world, +which is EPSG:4326, also known as "WGS 84", or just "lat-long". + +.. code-block:: python + + connection.load_collection( + ..., + spatial_extent={"west": 5.14, "south": 51.17, "east": 5.17, "north": 51.19}, + ) + +.. _filtering-on-temporal-extent-section: + +Filter on temporal extent +========================= + +Usually you don't need the complete time range provided by a collection +and you should specify an appropriate time window to load +as a ``temporal_extent`` pair containing a start and end date: + +.. code-block:: python + + connection.load_collection( + ..., + temporal_extent=["2021-05-07", "2021-05-14"], + ) + +In most use cases, day-level granularity is enough and you can just express the dates as strings in the format ``"yyyy-mm-dd"``. +You can also pass ``datetime.date`` objects (from Python standard library) if you already have your dates in that format. + +.. note:: + When you need finer, time-level granularity, you can pass ``datetime.datetime`` objects. + Or, when passed as a string, the openEO API requires date and time to be provided in RFC 3339 format. + For example for for 2020-03-17 at 12:34:56 in UTC:: + + "2020-03-17T12:34:56Z" + + + +.. _left-closed-temporal-extent: + +Left-closed intervals: start included, end excluded +--------------------------------------------------- + +Time ranges in openEO processes like ``load_collection`` and ``filter_temporal`` are handled as left-closed ("half-open") temporal intervals: +the start instant is included in the interval, but the end instant is excluded from the interval. + +For example, the interval defined by ``["2020-03-05", "2020-03-15"]`` covers observations +from 2020-03-05 up to (and including) 2020-03-14 (just before midnight), +but does not include observations from 2020-03-15. + +.. TODO: nicer diagram instead of this ASCII art +.. code-block:: text + + 2020-03-05 2020-03-14 2022-03-15 + ________|____________|_________________________|____________|____________|_____ + + [--------------------------------------------------(O + including excluding + 2020-03-05 00:00:00.000 2020-03-15 00:00:00.000 + + +While this might look unintuitive at first, +working with half-open intervals avoids common and hard to discover pitfalls when combining multiple intervals, +like unintended window overlaps or double counting observations at interval borders. + +.. _date-shorthand-handling: + +Year/month shorthand notation +------------------------------ + +.. note:: + + Year/month shorthand notation handling is available since version 0.23.0. + +Rounding down periods to dates +`````````````````````````````` + +The openEO Python Client Library supports some shorthand notations for the temporal extent, +which come in handy if you work with year/month based temporal intervals. +Date strings that only consist of a year or a month will be automatically +"rounded down" to the first day of that period. For example:: + + "2023" -> "2023-01-01" + "2023-08" -> "2023-08-01" + +This approach fits best with :ref:`left-closed interval handling `. + +For example, the following two ``load_collection`` calls are equivalent: + +.. code-block:: python + + # Filter for observations in 2021 (left-closed interval). + connection.load_collection(temporal_extent=["2021", "2022"], ...) + # The above is shorthand for: + connection.load_collection(temporal_extent=["2021-01-01", "2022-01-01"], ...) + +The same applies for :py:meth:`~openeo.rest.datacube.DataCube.filter_temporal()`, +which has a couple of additional call forms. +All these calls are equivalent: + +.. code-block:: python + + # Filter for March, April and May (left-closed interval) + cube = cube.filter_temporal("2021-03", "2021-06") + cube = cube.filter_temporal(["2021-03", "2021-06"]) + cube = cube.filter_temporal(start_date="2021-03", end_date="2021-06") + cube = cube.filter_temporal(extent=("2021-03", "2021-06")) + + # The above are shorthand for: + cube = cube.filter_temporal("2021-03-01", "2022-06-01") + +.. _single-string-temporal-extents: + +Single string temporal extents +`````````````````````````````` + +Apart from rounding down year or month string, the openEO Python Client Library provides an additional +``extent`` handling feature in methods like +:py:meth:`Connection.load_collection(temporal_extent=...) ` +and :py:meth:`DataCube.filter_temporal(extent=...) `. +Normally, the ``extent`` argument should be a list or tuple containing start and end date, +but if a single string is given, representing a year, month (or day) period, +it is automatically expanded to the appropriate interval, +again following the :ref:`left-closed interval principle `. +For example:: + + extent="2022" -> extent=("2022-01-01", "2023-01-01") + extent="2022-05" -> extent=("2022-05-01", "2022-06-01") + extent="2022-05-17" -> extent=("2022-05-17", "2022-05-18") + + +The following snippet shows some examples of equivalent calls: + +.. code-block:: python + + connection.load_collection(temporal_extent="2022", ...) + # The above is shorthand for: + connection.load_collection(temporal_extent=("2022-01-01", "2023-01-01"), ...) + + + cube = cube.filter_temporal(extent="2021-03") + # The above are shorthand for: + cube = cube.filter_temporal(extent=("2021-03-01", "2022-04-01")) + + +Filter on collection properties +=============================== + +Although openEO presents data in a data cube, a lot of collections are still backed by a product based catalog. This +allows filtering on properties of that catalog. + +A very common use case is to pre-filter Sentinel-2 products on cloud cover. +This avoids loading clouded data unnecessarily and increases performance. +:py:meth:`Connection.load_collection() ` provides +a dedicated ``max_cloud_cover`` argument (shortcut for the ``eo:cloud_cover`` property) for that: + +.. code-block:: python + :emphasize-lines: 4 + + connection.load_collection( + "SENTINEL2_L2A", + ..., + max_cloud_cover=80, + ) + +For more general cases, you can use the ``properties`` argument to filter on any collection property. +For example, to filter on the relative orbit number of SAR data: + +.. code-block:: python + :emphasize-lines: 4-6 + + connection.load_collection( + "SENTINEL1_GRD", + ..., + properties={ + "relativeOrbitNumber": lambda x: x==116 + }, + ) + +Version 0.26.0 of the openEO Python Client Library adds +:py:func:`~openeo.rest.graph_building.collection_property` +which makes defining such property filters more user-friendly by avoiding the ``lambda`` construct: + +.. code-block:: python + :emphasize-lines: 6-8 + + import openeo + + connection.load_collection( + "SENTINEL1_GRD", + ..., + properties=[ + openeo.collection_property("relativeOrbitNumber") == 116, + ], + ) + +Note that property names follow STAC metadata conventions, but some collections can have different names. + +Property filters in openEO are also specified by small process graphs, that allow the use of the same generic processes +defined by openEO. This is the 'lambda' process that you see in the property dictionary. Do note that not all processes +make sense for product filtering, and can not always be properly translated into the query language of the catalog. +Hence, some experimentation may be needed to find a filter that works. + +One important caveat in this example is that 'relativeOrbitNumber' is a catalog specific property name. Meaning that +different archives may choose a different name for a given property, and the properties that are available can depend +on the collection and the catalog that is used by it. This is not a problem caused by openEO, but by the limited +standardization between catalogs of EO data. + + +Handling large vector data sets +=============================== + +For simple use cases, it is common to directly embed geometries (vector data) in your openEO process graph. +Unfortunately, with large vector data sets this leads to very large process graphs +and you might hit certain limits, +resulting in HTTP errors like ``413 Request Entity Too Large`` or ``413 Payload Too Large``. + +This problem can be circumvented by first uploading your vector data to a file sharing service +(like Google Drive, DropBox, GitHub, ...) +and use its public URL in the process graph instead +through :py:meth:`Connection.vectorcube_from_paths `. +For example, as follows: + +.. code-block:: python + + # Load vector data from URL + url = "https://github.com/Open-EO/openeo-python-client/raw/master/tests/data/example_aoi.pq" + parcels = connection.vectorcube_from_paths([url], format="parquet") + + # Use the parcel vector data, for example to do aggregation. + cube = connection.load_collection( + "SENTINEL2_L2A", + bands=["B04", "B03", "B02"], + temporal_extent=["2021-05-12", "2021-06-01"], + ) + aggregations = cube.aggregate_spatial( + geometries=parcels, + reducer="mean", + ) + +Note that while openEO back-ends typically support multiple vector formats, like GeoJSON and GeoParquet, +it is usually recommended to use a compact format like GeoParquet, instead of GeoJSON. The list of supported formats +is also advertised by the backend, and can be queried with +:py:meth:`Connection.list_file_formats `. diff --git a/_sources/datacube_construction.rst.txt b/_sources/datacube_construction.rst.txt new file mode 100644 index 000000000..5422a7a87 --- /dev/null +++ b/_sources/datacube_construction.rst.txt @@ -0,0 +1,250 @@ + +======================= +DataCube construction +======================= + + +The ``load_collection`` process +================================= + +The most straightforward way to start building your openEO data cube is through the ``load_collection`` process. +As mentioned earlier, this is provided by the +:py:meth:`~openeo.rest.connection.Connection.load_collection` method +on a :py:class:`~openeo.rest.connection.Connection` object, +which produces a :py:class:`~openeo.rest.datacube.DataCube` instance. +For example:: + + cube = connection.load_collection("SENTINEL2_TOC") + +While this should cover the majority of use cases, +there some cases +where one wants to build a :py:class:`~openeo.rest.datacube.DataCube` object +from something else or something more than just a simple ``load_collection`` process. + + + +.. _datacube_from_process: + +Construct DataCube from process +================================= + +Through :ref:`user-defined processes ` one can encapsulate +one or more ``load_collection`` processes and additional processing steps in a single +reusable user-defined process. +For example, imagine a user-defined process "masked_s2" +that loads an openEO collection "SENTINEL2_TOC" and applies some kind of cloud masking. +The implementation details of the cloud masking are not important here, +but let's assume there is a parameter "dilation" to fine-tune the cloud mask. +Also note that the collection id "SENTINEL2_TOC" is hardcoded in the user-defined process. + +We can now construct a data cube from this user-defined process +with :py:meth:`~openeo.rest.connection.Connection.datacube_from_process` +as follows:: + + cube = connection.datacube_from_process("masked_s2", dilation=10) + + # Further processing of the cube: + cube = cube.filter_temporal("2020-09-01", "2020-09-10") + + +Note that :py:meth:`~openeo.rest.connection.Connection.datacube_from_process` can be +used with all kind of processes, not only user-defined processes. +For example, while this is not exactly a real EO data use case, +it will produce a valid openEO process graph that can be executed:: + + >>> cube = connection.datacube_from_process("mean", data=[2, 3, 5, 8]) + >>> cube.execute() + 4.5 + + + +.. _datacube_from_json: + +Construct a DataCube from JSON +=============================== + +openEO process graphs are typically stored and published in JSON format. +Most notably, user-defined processes are transferred between openEO client +and back-end in a JSON structure roughly like in this example:: + + { + "id": "evi", + "parameters": [ + {"name": "red", "schema": {"type": "number"}}, + {"name": "blue", "schema": {"type": "number"}}, + ... + ], + "process_graph": { + "sub": {"process_id": "subtract", "arguments": {"x": {"from_parameter": "nir"}, "y": {"from_parameter": "red"}}}, + "p1": {"process_id": "multiply", "arguments": {"x": 6, "y": {"from_parameter": "red"}}}, + "div": {"process_id": "divide", "arguments": {"x": {"from_node": "sub"}, "y": {"from_node": "sum"}}, + ... + + +It is possible to construct a :py:class:`~openeo.rest.datacube.DataCube` object that corresponds with this +process graph with the :py:meth:`Connection.datacube_from_json ` method. +It can be given one of: + + - a raw JSON string, + - a path to a local JSON file, + - an URL that points to a JSON resource + +The JSON structure should be one of: + + - a mapping (dictionary) like the example above with at least a ``"process_graph"`` item, + and optionally a ``"parameters"`` item. + - a mapping (dictionary) with ``{"process_id": ...}`` items + + +Some examples +--------------- + +Load a :py:class:`~openeo.rest.datacube.DataCube` from a raw JSON string, containing a +simple "flat graph" representation: + +.. code-block:: python + + raw_json = '''{ + "lc": {"process_id": "load_collection", "arguments": {"id": "SENTINEL2_TOC"}}, + "ak": {"process_id": "apply_kernel", "arguments": {"data": {"from_node": "lc"}, "kernel": [[1,2,1],[2,5,2],[1,2,1]]}, "result": true} + }''' + cube = connection.datacube_from_json(raw_json) + +Load from a raw JSON string, containing a mapping with "process_graph" and "parameters": + +.. code-block:: python + + raw_json = '''{ + "parameters": [ + {"name": "kernel", "schema": {"type": "array"}, "default": [[1,2,1], [2,5,2], [1,2,1]]} + ], + "process_graph": { + "lc": {"process_id": "load_collection", "arguments": {"id": "SENTINEL2_TOC"}}, + "ak": {"process_id": "apply_kernel", "arguments": {"data": {"from_node": "lc"}, "kernel": {"from_parameter": "kernel"}}, "result": true} + } + }''' + cube = connection.datacube_from_json(raw_json) + +Load directly from a local file or URL containing these kind of JSON representations: + +.. code-block:: python + + # Local file + cube = connection.datacube_from_json("path/to/my_udp.json") + + # URL + cube = connection.datacube_from_json("https://example.com/my_udp.json") + + +Parameterization +----------------- + +When the process graph uses parameters, you must specify the desired parameter values +at the time of calling :py:meth:`Connection.datacube_from_json `. + +For example, take this simple toy example of a process graph that takes the sum of 5 and a parameter "increment": + +.. code-block:: python + + raw_json = '''{"add": { + "process_id": "add", + "arguments": {"x": 5, "y": {"from_parameter": "increment"}}, + "result": true + }}''' + +Trying to build a :py:class:`~openeo.rest.datacube.DataCube` from it without specifying parameter values will fail +like this: + +.. code-block:: pycon + + >>> cube = connection.datacube_from_json(raw_json) + ProcessGraphVisitException: No substitution value for parameter 'increment'. + +Instead, specify the parameter value: + +.. code-block:: pycon + :emphasize-lines: 3 + + >>> cube = connection.datacube_from_json( + ... raw_json, + ... parameters={"increment": 4}, + ... ) + >>> cube.execute() + 9 + + +Parameters can also be defined with default values, which will be used when they are not specified +in the :py:meth:`Connection.datacube_from_json ` call: + +.. code-block:: python + + raw_json = '''{ + "parameters": [ + {"name": "increment", "schema": {"type": "number"}, "default": 100} + ], + "process_graph": { + "add": {"process_id": "add", "arguments": {"x": 5, "y": {"from_parameter": "increment"}}, "result": true} + } + }''' + + cube = connection.datacube_from_json(raw_json) + result = cube.execute()) + # result will be 105 + + +Re-parameterization +``````````````````` + +TODO + + + +.. _multi-result-process-graphs: +Building process graphs with multiple result nodes +=================================================== + +.. note:: + Multi-result support is added in version 0.35.0 + +Most openEO use cases are just about building a single result data cube, +which is readily covered in the openEO Python client library through classes like +:py:class:`~openeo.rest.datacube.DataCube` and :py:class:`~openeo.rest.vectorcube.VectorCube`. +It is straightforward to create a batch job from these, or execute/download them synchronously. + +The openEO API also allows multiple result nodes in a single process graph, +for example to persist intermediate results or produce results in different output formats. +To support this, the openEO Python client library provides the :py:class:`~openeo.rest.multiresult.MultiResult` class, +which allows to group multiple :py:class:`~openeo.rest.datacube.DataCube` and :py:class:`~openeo.rest.vectorcube.VectorCube` objects +in a single entity that can be used to create or run batch jobs. For example: + + +.. code-block:: python + + from openeo import MultiResult + + cube1 = ... + cube2 = ... + multi_result = MultiResult([cube1, cube2]) + job = multi_result.create_job() + + +Moreover, it is not necessary to explicitly create such a +:py:class:`~openeo.rest.multiresult.MultiResult` object, +as the :py:meth:`Connection.create_job() ` method +directly supports passing multiple data cube objects in a list, +which will be automatically grouped as a multi-result: + +.. code-block:: python + + cube1 = ... + cube2 = ... + job = connection.create_job([cube1, cube2]) + + +.. important:: + + Only a single :py:class:`~openeo.rest.connection.Connection` can be in play + when grouping multiple results like this. + As everything is to be merged in a single process graph + to be sent to a single backend, + it is not possible to mix cubes created from different connections. diff --git a/_sources/development.rst.txt b/_sources/development.rst.txt new file mode 100644 index 000000000..8452c9a97 --- /dev/null +++ b/_sources/development.rst.txt @@ -0,0 +1,422 @@ +.. _development-and-maintenance: + +########################### +Development and maintenance +########################### + + +For development on the ``openeo`` package itself, +it is recommended to install a local git checkout of the project +in development mode (``-e``) +with additional development related dependencies (``[dev]``) +like this:: + + pip install -e .[dev] + +If you are on Windows and experience problems installing this way, you can find some solutions in section `Development Installation on Windows`_. + +Running the unit tests +====================== + +The test suite of the openEO Python Client leverages +the nice `pytest `_ framework. +It is installed automatically when installing the openEO Python Client +with the ``[dev]`` extra as shown above. +Running the whole tests is as simple as executing:: + + pytest + +There are a ton of command line options for fine-tuning +(e.g. select a subset of tests, how results should be reported, ...). +Run ``pytest -h`` for a quick overview +or check the `pytest `_ documentation for more information. + +For example:: + + # Skip tests that are marked as slow + pytest -m "not slow" + + +Building the documentation +========================== + +Building the documentation requires `Sphinx `_ +and some plugins +(which are installed automatically as part of the ``[dev]`` install). + +Quick and easy +--------------- + +The easiest way to build the documentation is working from the ``docs`` folder +and using the ``Makefile``: + +.. code-block:: shell + + # From `docs` folder + make html + +(assumes you have ``make`` available, if not: use ``python -msphinx -M html . _build``.) + +This will generate the docs in HTML format under ``docs/_build/html/``. +Open the HTML files manually, +or use Python's built-in web server to host them locally, e.g.: + +.. code-block:: shell + + # From `docs` folder + python -m http.server 8000 + +Then, visit http://127.0.0.1:8000/_build/html/ in your browser + + +Like a Pro +------------ + +When doing larger documentation work, it can be tedious to manually rebuild the docs +and refresh your browser to check the result. +Instead, use `sphinx-autobuild `_ +to automatically rebuild on documentation changes and live-reload it in your browser. +After installation (``pip install sphinx-autobuild`` in your development environment), +just run + +.. code-block:: shell + + # From project root + sphinx-autobuild docs/ --watch openeo/ docs/_build/html/ + +and then visit http://127.0.0.1:8000 . +When you change (and save) documentation source files, your browser should now +automatically refresh and show the newly built docs. Just like magic. + + +Contributing code +================== + +User contributions (such as bug fixes and new features, both in source code and documentation) +are greatly appreciated and welcome. + + +Pull requests +-------------- + +We use a traditional `GitHub Pull Request (PR) `_ workflow +for user contributions, which roughly follows these steps: + +- Create a personal fork of https://github.com/Open-EO/openeo-python-client + (unless you already have push permissions to an existing fork or the original repo) +- Preferably: work on your contribution in a new feature branch +- Push your feature branch to your fork and create a pull request +- The pull request is the place for review, discussion and fine-tuning of your work +- Once your pull request is in good shape it will be merged by a maintainer + + +.. _precommit: + +Pre-commit for basic code quality checks +------------------------------------------ + +We started using the `pre-commit `_ tool +for basic fine-tuning of code style and quality in new contributions. +It's currently not enforced, but **enabling pre-commit is recommended** and appreciated +when contributing code. + +.. note:: + + Note that the whole repository does not fully follow all code styles rules at the moment. + We're just gradually introducing it, piggybacking on new contributions and commits. + + +Pre-commit set up +"""""""""""""""""" + +- Install the general ``pre-commit`` command line tool: + + - The simplest option is to install it directly in the *virtual environment* + you are using for openEO Python client development (e.g. ``pip install pre-commit``). + - You can also install it *globally* on your system + (e.g. using `pipx `_, + `uv tool `_, + conda, homebrew, ...) + so you can use it across different projects. + +- Install the project specific git hook scripts by running this in the root of your local git clone: + + .. code-block:: console + + pre-commit install + + This will automatically install additional scripts and tools in a sandbox + to run the various checks defined in the project's ``.pre-commit-config.yaml`` configuration file. + +Pre-commit usage +""""""""""""""""" + +When you commit new changes, the freshly installed pre-commit hook +will now automatically run each of the configured linters/formatters/... +Some of these just flag issues (e.g. invalid JSON files) +while others even automatically fix problems (e.g. clean up excessive whitespace). + +If there is some kind of violation, the commit will be blocked. +Address these problems and try to commit again. + +.. attention:: + + Some pre-commit tools directly *edit* your files (e.g. formatting tweaks) + instead of just flagging issues. + This might feel intrusive at first, but once you get the hang of it, + it should allow to streamline your workflow. + + In particular, it is recommended to use the *staging* feature of git to prepare your commit. + Pre-commit's proposed changes are not staged automatically, + so you can more easily keep them separate and review. + +.. tip:: + + You can temporarily disable pre-commit for these rare cases + where you intentionally want to commit violating code style, + e.g. through ``git commit`` command line option ``-n``/``--no-verify``. + + + + +Creating a release +================== + +This section describes the procedure to create +properly versioned releases of the ``openeo`` package +that can be downloaded by end users (e.g. through ``pip`` from pypi.org) +and depended on by other projects. + +The releases will end up on: + +- PyPi: `https://pypi.org/project/openeo `_ +- VITO Artifactory: `https://artifactory.vgt.vito.be/api/pypi/python-openeo/simple/openeo/ `_ +- GitHub: `https://github.com/Open-EO/openeo-python-client/releases `_ + +Prerequisites +------------- + +- You have permissions to push branches and tags and maintain releases on + the `openeo-python-client project on GitHub `_. +- You have permissions to upload releases to the + `openeo project on pypi.org `_ +- The Python virtual environment you work in has the latest versions + of the ``twine`` package installed. + If you plan to build the wheel yourself (instead of letting GitHub or Jenkins do this), + you also need recent enough versions of the ``setuptools`` and ``wheel`` packages. + +Important files +--------------- + +``setup.py`` + describes the metadata of the package, + like package name ``openeo`` and version + (which is extracted from ``openeo/_version.py``). + +``openeo/_version.py`` + defines the version of the package. + During general **development**, this version string should contain + a `pre-release `_ + segment (e.g. ``a1`` for alpha releases, ``b1`` for beta releases, etc) + to avoid collision with final releases. For example:: + + __version__ = '0.8.0a1' + + As discussed below, this pre-release suffix should + only be removed during the release procedure + and restored when bumping the version after the release procedure. + +``CHANGELOG.md`` + keeps track of important changes associated with each release. + It follows the `Keep a Changelog `_ convention + and should be properly updated with each bug fix, feature addition/removal, ... + under the ``Unreleased`` section during development. + +Procedure +--------- + +These are the steps to create and publish a new release of the ``openeo`` package. +To avoid the confusion with ad-hoc injection of some abstract version placeholder +that has to be replaced properly, +we will use a concrete version ``0.8.0`` in the examples below. + +0. Make sure you are working on **latest master branch**, + without uncommitted changes and all tests are properly passing. + +#. Create release commit: + + A. **Drop the pre-release suffix** from the version string in ``openeo/_version.py`` + so that it just a "final" semantic versioning string, e.g. ``0.8.0`` + + B. **Update CHANGELOG.md**: rename the "Unreleased" section title + to contain version and date, e.g.:: + + ## [0.8.0] - 2020-12-15 + + remove empty subsections + and start a new "Unreleased" section above it, like:: + + ## [Unreleased] + + ### Added + + ### Changed + + ### Removed + + ### Fixed + + + C. **Commit** these changes in git with a commit message like ``Release 0.8.0`` + and **push** to GitHub:: + + git add openeo/_version.py CHANGELOG.md + git commit -m 'Release 0.8.0' + git push origin master + +#. Optional, but recommended: wait for **VITO Jenkins** to build this updated master + (trigger it manually if necessary), + so that a build of a final, non-alpha release ``0.8.0`` + is properly uploaded to **VITO artifactory**. + +#. Create release on `PyPI `_: + + A. **Obtain a wheel archive** of the package, with one of these approaches: + + - *Preferably, the path of least surprise*: build wheel through GitHub Actions. + Go to workflow `"Build wheel" `_, + manually trigger a build with "Run workflow" button, wait for it to finish successfully, + download generated ``artifact.zip``, and finally: unzip it to obtain ``openeo-0.8.0-py3-none-any.whl`` + + - *Or, if you know what you are doing* and you're sure you have a clean + local checkout, you can also build it locally:: + + python setup.py bdist_wheel + + This should create ``dist/openeo-0.8.0-py3-none-any.whl`` + + B. **Upload** this wheel to `openeo project on PyPI `_:: + + python -m twine upload openeo-0.8.0-py3-none-any.whl + + Check the `release history on PyPI `_ + to verify the twine upload. + Another way to verify that the freshly created release installs + is using docker to do a quick install-and-burn, + for example as follows (check the installed version in pip's output):: + + docker run --rm -it python python -m pip install --no-deps openeo + +#. Create a **git version tag** and push it to GitHub:: + + git tag v0.8.0 + git push origin v0.8.0 + +#. Create a **release in GitHub**: + Go to `https://github.com/Open-EO/openeo-python-client/releases/new `_, + Enter ``v0.8.0`` under "tag", + enter title: ``openEO Python Client v0.8.0``, + use the corresponding ``CHANGELOG.md`` section as description + and publish it + (no need to attach binaries). + +#. **Bump the version** in ``openeo/_version.py``, (usually the "minor" level) + and append a pre-release "a1" suffix again, for example:: + + __version__ = '0.9.0a1' + + Commit this (e.g. with message ``_version.py: bump to 0.9.0a1``) + and push to GitHub. + +#. Update `conda-forge package `_ too + (requires conda recipe maintainer role). + Normally, the "regro-cf-autotick-bot" will create a `pull request `_. + If it builds fine, merge it. + If not, fix the issue + (typically in `recipe/meta.yaml `_) + and merge. + +#. Optionally: make a post about the new release + on the `openEO Platform Forum `_ + or the `CDSE Forum `_. + +Verification +""""""""""""" + +The new release should now be available/listed at: + +- `https://pypi.org/project/openeo/#history `_ +- `https://github.com/Open-EO/openeo-python-client/releases `_ + +Here is a bash (subshell) oneliner to verify that the PyPI release works properly:: + + ( + cd /tmp &&\ + python -m venv venv-openeo &&\ + source venv-openeo/bin/activate &&\ + pip install -U openeo &&\ + python -c "import openeo;print(openeo);print(openeo.__version__)" + ) + +It tries to install the latest version of the ``openeo`` package in a temporary virtual env, +import it and print the package version. + + +Development Installation on Windows +=================================== + +Normally you can install the client the same way on Windows as on Linux, like so: + +.. code-block:: console + + pip install -e .[dev] + +Alternative development installation +------------------------------------- + +The standard pure-``pip`` based installation should work with the most recent code. +However, in the past we sometimes had issues with this procedure. +Should you experience problems, consider using an alternative conda-based installation procedure: + +1. Create and activate a new conda environment for developing the openeo-python-client. + For example: + + .. code-block:: console + + conda create -n openeopyclient + conda activate openeopyclient + +2. In that conda environment, install only the dependencies of ``openeo`` via conda, + but not the ``openeo`` package itself. + + .. code-block:: console + + # Install openeo dependencies (from the conda-forge channel) + conda install --only-deps -c conda-forge openeo + +3. Do a ``pip install`` from the project root in *editable mode* (``pip -e``): + + .. code-block:: console + + pip install -e .[dev] + + + +Update of generated files +========================== + +Some parts of the openEO Python Client Library source code are +generated/compiled from upstream sources (e.g. official openEO specifications). +Because updates are not often required, +it's just a semi-manual procedure (to run from the project root): + +.. code-block:: console + + # Update the sub-repositories (like git submodules, but optional) + python specs/update-subrepos.py + + # Update `openeo/processes.py` from specifications in openeo-processes repository + python openeo/internal/processes/generator.py specs/openeo-processes specs/openeo-processes/proposals --output openeo/processes.py + + # Update the openEO process mapping documentation page + python docs/process_mapping.py > docs/process_mapping.rst diff --git a/_sources/index.rst.txt b/_sources/index.rst.txt new file mode 100644 index 000000000..b2c1ba643 --- /dev/null +++ b/_sources/index.rst.txt @@ -0,0 +1,75 @@ + +openEO Python Client +===================== + +.. image:: https://img.shields.io/badge/Status-Stable-yellow.svg + +Welcome to the documentation of ``openeo``, +the official Python client library for interacting with **openEO** back-ends +to process remote sensing and Earth observation data. +It provides a **Pythonic** interface for the openEO API, +supporting data/process discovery, process graph building, +batch job management and much more. + + +Usage example +------------- + +A simple example, to give a feel of using this library: + +.. code-block:: python + + import openeo + + # Connect to openEO back-end. + connection = openeo.connect("openeo.vito.be").authenticate_oidc() + + # Load data cube from TERRASCOPE_S2_NDVI_V2 collection. + cube = connection.load_collection( + "TERRASCOPE_S2_NDVI_V2", + spatial_extent={"west": 5.05, "south": 51.21, "east": 5.1, "north": 51.23}, + temporal_extent=["2022-05-01", "2022-05-30"], + bands=["NDVI_10M"], + ) + # Rescale digital number to physical values and take temporal maximum. + cube = cube.apply(lambda x: 0.004 * x - 0.08).max_time() + + cube.download("ndvi-max.tiff") + + +.. image:: _static/images/welcome.png + + +Table of contents +----------------- + +.. toctree:: + :maxdepth: 2 + + self + installation + basics + data_access + processes + batch_jobs + udp + auth + udf + datacube_construction + machine_learning + configuration + cookbook/index + api + api-processes + process_mapping + development + best_practices + changelog + + +Indices and tables +------------------ + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/_sources/installation.rst.txt b/_sources/installation.rst.txt new file mode 100644 index 000000000..68df3f4af --- /dev/null +++ b/_sources/installation.rst.txt @@ -0,0 +1,126 @@ +************* +Installation +************* + + +It is an explicit goal of the openEO Python client library to be as easy to install as possible, +unlocking the openEO ecosystem to a broad audience. +The package is a pure Python implementation and its dependencies are carefully considered (in number and complexity). + + +Basic install +============= + +It is recommended to work in a some kind of *virtual environment* (``venv``, ``conda``, ...) +to avoid polluting the base install of Python on your operating system +or introducing conflicts with other applications. +How you organize your virtual environments heavily depends on your use case and workflow, +and is out of scope of this documentation. + + +Installation with ``pip`` +------------------------- + +The openEO Python client library is available from `PyPI `_ +and can be easily installed with a tool like ``pip``, for example: + +.. code-block:: console + + $ pip install openeo + +To upgrade the package to the latest release: + +.. code-block:: console + + $ pip install --upgrade openeo + + +Installation with Conda +------------------------ + +The openEO Python client library is available on `conda-forge `_ +and can be easily installed in a conda environment, for example: + +.. code-block:: console + + $ conda install -c conda-forge openeo + + +Verifying and troubleshooting +----------------------------- + +You can check if the installation worked properly +by trying to import the ``openeo`` package in a Python script, interactive shell or notebook: + +.. code-block:: python + + import openeo + + print(openeo.client_version()) + +This should print the installed version of the ``openeo`` package. + +If the first line gives an error like ``ModuleNotFoundError: No module named 'openeo'``, +some troubleshooting tips: + +- Restart you Python shell or notebook (or start a fresh one). +- Double check that the installation went well, + e.g. try re-installing and keep an eye out for error/warning messages. +- Make sure that you are working in the same (virtual) environment you installed the package in. + +If you still have troubles installing and importing ``openeo``, +feel free to reach out in the `community forum `_ +or the `project's issue tracker `_. +Try to describe your setup in enough detail: your operating system, +which virtual environment system you use, +the installation tool (``pip``, ``conda`` or something else), ... + + + +.. _installation-optional-dependencies: + +Optional dependencies +====================== + +Depending on your use case, you might also want to install some additional libraries. +For example: + +- ``netCDF4`` or ``h5netcdf`` for loading and writing NetCDF files (e.g. integrated in ``xarray.load_dataset()``) +- ``matplotlib`` for visualisation (e.g. integrated plot functionality in ``xarray`` ) +- ``pyarrow`` for (read/write) support of Parquet files + (e.g. with :py:class:`~openeo.extra.job_management.MultiBackendJobManager`) +- ``rioxarray`` for GeoTIFF support in the assert helpers from ``openeo.testing.results`` +- ``geopandas`` for working with dataframes with geospatial support, + (e.g. with :py:class:`~openeo.extra.job_management.MultiBackendJobManager`) + + +Enabling additional features +---------------------------- + +To use the on-demand preview feature and other Jupyter-enabled features, you need to install the necessary dependencies. + +.. code-block:: console + + $ pip install openeo[jupyter] + + +Source or development install +============================== + +If you closely track the development of the ``openeo`` package at +`github.com/Open-EO/openeo-python-client `_ +and want to work with unreleased features or contribute to the development of the package, +you can install it as follows from the root of a git source checkout: + +.. code-block:: console + + $ pip install -e .[dev] + +The ``-e`` option enables "development mode", which makes sure that changes you make to the source code +happen directly on the installed package, so that you don't have to re-install the package each time +you make a change. + +The ``[dev]`` (a so-called "extra") installs additional development related dependencies, +for example to run the unit tests. + +You can also find more information about installation for development on the :ref:`development-and-maintenance` page. diff --git a/_sources/machine_learning.rst.txt b/_sources/machine_learning.rst.txt new file mode 100644 index 000000000..69f315e1b --- /dev/null +++ b/_sources/machine_learning.rst.txt @@ -0,0 +1,118 @@ +****************** +Machine Learning +****************** + +.. warning:: + This API and documentation is experimental, + under heavy development and subject to change. + + +.. versionadded:: 0.10.0 + + +Random Forest based Classification and Regression +=================================================== + +openEO defines a couple of processes for *random forest* based machine learning +for Earth Observation applications: + +- ``fit_class_random_forest`` for training a random forest based classification model +- ``fit_regr_random_forest`` for training a random forest based regression model +- ``predict_random_forest`` for inference/prediction + +The openEO Python Client library provides the necessary functionality to set up +and execute training and inference workflows. + +Training +--------- + +Let's focus on training a classification model, where we try to predict +a class like a land cover type or crop type based on predictors +we derive from EO data. +For example, assume we have a GeoJSON FeatureCollection +of sample points and a corresponding classification target value as follows:: + + feature_collection = {"type": "FeatureCollection", "features": [ + { + "type": "Feature", + "properties": {"id": "b3dw-wd23", "target": 3}, + "geometry": {"type": "Point", "coordinates": [3.4, 51.1]} + }, + { + "type": "Feature", + "properties": {"id": "r8dh-3jkd", "target": 5}, + "geometry": {"type": "Point", "coordinates": [3.6, 51.2]} + }, + ... + + +.. note:: + Confusingly, the concept "feature" has somewhat conflicting meanings + for different audiences. GIS/EO people use "feature" to refer to the "rows" + in this feature collection. + For the machine learning community however, the properties (the "columns") + are the features. + To avoid confusion in this discussion we will avoid the term "feature" + and instead use "sample point" for the former and "predictor" for the latter. + + +We first build a datacube of "predictor" bands. +For simplicity, we will just use the raw B02/B03/B04 band values here +and use the temporal mean to eliminate the time dimension:: + + cube = connection.load_collection( + "SENTINEL2", + temporal_extent=[start, end], + spatial_extent=bbox, + bands=["B02", "B03", "B04"] + ) + cube = cube.reduce_dimension(dimension="t", reducer="mean") + +We now use ``aggregate_spatial`` to sample this *raster data cube* at the sample points +and get a *vector cube* where we have the temporal mean of the B02/B03/B04 bands as predictor values:: + + predictors = cube.aggregate_spatial(feature_collection, reducer="mean") + +We can now train a *Random Forest* model by calling the +:py:meth:`~openeo.rest.vectorcube.VectorCube.fit_class_random_forest` method on the predictor vector cube +and passing the original target class data:: + + model = predictors.fit_class_random_forest( + target=feature_collection, + ) + # Save the model as a batch job result asset + # so that we can load it in another job. + model = model.save_ml_model() + +Finally execute this whole training flow as a batch job:: + + training_job = model.create_job() + training_job.start_and_wait() + + +Inference +---------- + +When the batch job finishes successfully, the trained model can then be used +with the ``predict_random_forest`` process on the raster data cube +(or another cube with the same band structure) to classify all the pixels. + +Technically, the openEO ``predict_random_forest`` process has to be used as a reducer function +inside a ``reduce_dimension`` call, but the openEO Python client library makes it +a bit easier by providing a :py:meth:`~openeo.rest.datacube.DataCube.predict_random_forest` method +directly on the :py:class:`~openeo.rest.datacube.DataCube` class, so that you can just do:: + + predicted = cube.predict_random_forest( + model=training_job.job_id, + dimension="bands" + ) + + predicted.download("predicted.GTiff") + + +We specified the model here by batch job id (string), +but it can also be specified in other ways: +as :py:class:`~openeo.rest.job.BatchJob` instance, +as URL to the corresponding STAC Item that implements the `ml-model` extension, +or as :py:class:`~openeo.rest.mlmodel.MlModel` instance (e.g. loaded through +:py:meth:`~openeo.rest.connection.Connection.load_ml_model`). diff --git a/_sources/process_mapping.rst.txt b/_sources/process_mapping.rst.txt new file mode 100644 index 000000000..60519285c --- /dev/null +++ b/_sources/process_mapping.rst.txt @@ -0,0 +1,332 @@ + +.. + !Warning! This is an auto-generated file. + Do not edit directly. + Generated from: ['docs/process_mapping.py'] + +.. _openeo_process_mapping: + +openEO Process Mapping +####################### + +The table below maps openEO processes to the corresponding +method or function in the openEO Python Client Library. + +.. list-table:: + :header-rows: 1 + + * - openEO process + - openEO Python Client Method + + * - `absolute `_ + - :py:meth:`ProcessBuilder.absolute() `, :py:meth:`absolute() ` + * - `add `_ + - :py:meth:`ProcessBuilder.__add__() `, :py:meth:`ProcessBuilder.__radd__() `, :py:meth:`ProcessBuilder.add() `, :py:meth:`add() `, :py:meth:`DataCube.add() `, :py:meth:`DataCube.__add__() `, :py:meth:`DataCube.__radd__() ` + * - `add_dimension `_ + - :py:meth:`ProcessBuilder.add_dimension() `, :py:meth:`add_dimension() `, :py:meth:`DataCube.add_dimension() ` + * - `aggregate_spatial `_ + - :py:meth:`ProcessBuilder.aggregate_spatial() `, :py:meth:`aggregate_spatial() `, :py:meth:`DataCube.aggregate_spatial() ` + * - `aggregate_spatial_window `_ + - :py:meth:`ProcessBuilder.aggregate_spatial_window() `, :py:meth:`aggregate_spatial_window() `, :py:meth:`DataCube.aggregate_spatial_window() ` + * - `aggregate_temporal `_ + - :py:meth:`ProcessBuilder.aggregate_temporal() `, :py:meth:`aggregate_temporal() `, :py:meth:`DataCube.aggregate_temporal() ` + * - `aggregate_temporal_period `_ + - :py:meth:`ProcessBuilder.aggregate_temporal_period() `, :py:meth:`aggregate_temporal_period() `, :py:meth:`DataCube.aggregate_temporal_period() ` + * - `all `_ + - :py:meth:`ProcessBuilder.all() `, :py:meth:`all() ` + * - `and `_ + - :py:meth:`DataCube.logical_and() `, :py:meth:`DataCube.__and__() ` + * - `and_ `_ + - :py:meth:`ProcessBuilder.and_() `, :py:meth:`and_() ` + * - `anomaly `_ + - :py:meth:`ProcessBuilder.anomaly() `, :py:meth:`anomaly() ` + * - `any `_ + - :py:meth:`ProcessBuilder.any() `, :py:meth:`any() ` + * - `apply `_ + - :py:meth:`ProcessBuilder.apply() `, :py:meth:`apply() `, :py:meth:`DataCube.apply() ` + * - `apply_dimension `_ + - :py:meth:`ProcessBuilder.apply_dimension() `, :py:meth:`apply_dimension() `, :py:meth:`DataCube.apply_dimension() ` + * - `apply_kernel `_ + - :py:meth:`ProcessBuilder.apply_kernel() `, :py:meth:`apply_kernel() `, :py:meth:`DataCube.apply_kernel() ` + * - `apply_neighborhood `_ + - :py:meth:`ProcessBuilder.apply_neighborhood() `, :py:meth:`apply_neighborhood() `, :py:meth:`DataCube.apply_neighborhood() ` + * - `arccos `_ + - :py:meth:`ProcessBuilder.arccos() `, :py:meth:`arccos() ` + * - `arcosh `_ + - :py:meth:`ProcessBuilder.arcosh() `, :py:meth:`arcosh() ` + * - `arcsin `_ + - :py:meth:`ProcessBuilder.arcsin() `, :py:meth:`arcsin() ` + * - `arctan `_ + - :py:meth:`ProcessBuilder.arctan() `, :py:meth:`arctan() ` + * - `arctan2 `_ + - :py:meth:`ProcessBuilder.arctan2() `, :py:meth:`arctan2() ` + * - `ard_normalized_radar_backscatter `_ + - :py:meth:`ProcessBuilder.ard_normalized_radar_backscatter() `, :py:meth:`ard_normalized_radar_backscatter() `, :py:meth:`DataCube.ard_normalized_radar_backscatter() ` + * - `ard_surface_reflectance `_ + - :py:meth:`ProcessBuilder.ard_surface_reflectance() `, :py:meth:`ard_surface_reflectance() `, :py:meth:`DataCube.ard_surface_reflectance() ` + * - `array_append `_ + - :py:meth:`ProcessBuilder.array_append() `, :py:meth:`array_append() ` + * - `array_apply `_ + - :py:meth:`ProcessBuilder.array_apply() `, :py:meth:`array_apply() ` + * - `array_concat `_ + - :py:meth:`ProcessBuilder.array_concat() `, :py:meth:`array_concat() ` + * - `array_contains `_ + - :py:meth:`ProcessBuilder.array_contains() `, :py:meth:`array_contains() ` + * - `array_create `_ + - :py:meth:`ProcessBuilder.array_create() `, :py:meth:`array_create() ` + * - `array_create_labeled `_ + - :py:meth:`ProcessBuilder.array_create_labeled() `, :py:meth:`array_create_labeled() ` + * - `array_element `_ + - :py:meth:`ProcessBuilder.__getitem__() `, :py:meth:`ProcessBuilder.array_element() `, :py:meth:`array_element() ` + * - `array_filter `_ + - :py:meth:`ProcessBuilder.array_filter() `, :py:meth:`array_filter() ` + * - `array_find `_ + - :py:meth:`ProcessBuilder.array_find() `, :py:meth:`array_find() ` + * - `array_find_label `_ + - :py:meth:`ProcessBuilder.array_find_label() `, :py:meth:`array_find_label() ` + * - `array_interpolate_linear `_ + - :py:meth:`ProcessBuilder.array_interpolate_linear() `, :py:meth:`array_interpolate_linear() ` + * - `array_labels `_ + - :py:meth:`ProcessBuilder.array_labels() `, :py:meth:`array_labels() ` + * - `array_modify `_ + - :py:meth:`ProcessBuilder.array_modify() `, :py:meth:`array_modify() ` + * - `arsinh `_ + - :py:meth:`ProcessBuilder.arsinh() `, :py:meth:`arsinh() ` + * - `artanh `_ + - :py:meth:`ProcessBuilder.artanh() `, :py:meth:`artanh() ` + * - `atmospheric_correction `_ + - :py:meth:`ProcessBuilder.atmospheric_correction() `, :py:meth:`atmospheric_correction() `, :py:meth:`DataCube.atmospheric_correction() ` + * - `between `_ + - :py:meth:`ProcessBuilder.between() `, :py:meth:`between() ` + * - `ceil `_ + - :py:meth:`ProcessBuilder.ceil() `, :py:meth:`ceil() ` + * - `climatological_normal `_ + - :py:meth:`ProcessBuilder.climatological_normal() `, :py:meth:`climatological_normal() ` + * - `clip `_ + - :py:meth:`ProcessBuilder.clip() `, :py:meth:`clip() ` + * - `cloud_detection `_ + - :py:meth:`ProcessBuilder.cloud_detection() `, :py:meth:`cloud_detection() ` + * - `constant `_ + - :py:meth:`ProcessBuilder.constant() `, :py:meth:`constant() ` + * - `cos `_ + - :py:meth:`ProcessBuilder.cos() `, :py:meth:`cos() ` + * - `cosh `_ + - :py:meth:`ProcessBuilder.cosh() `, :py:meth:`cosh() ` + * - `count `_ + - :py:meth:`ProcessBuilder.count() `, :py:meth:`count() `, :py:meth:`DataCube.count_time() ` + * - `create_raster_cube `_ + - :py:meth:`ProcessBuilder.create_raster_cube() `, :py:meth:`create_raster_cube() ` + * - `cummax `_ + - :py:meth:`ProcessBuilder.cummax() `, :py:meth:`cummax() ` + * - `cummin `_ + - :py:meth:`ProcessBuilder.cummin() `, :py:meth:`cummin() ` + * - `cumproduct `_ + - :py:meth:`ProcessBuilder.cumproduct() `, :py:meth:`cumproduct() ` + * - `cumsum `_ + - :py:meth:`ProcessBuilder.cumsum() `, :py:meth:`cumsum() ` + * - `date_shift `_ + - :py:meth:`ProcessBuilder.date_shift() `, :py:meth:`date_shift() ` + * - `dimension_labels `_ + - :py:meth:`ProcessBuilder.dimension_labels() `, :py:meth:`dimension_labels() `, :py:meth:`DataCube.dimension_labels() ` + * - `divide `_ + - :py:meth:`ProcessBuilder.__truediv__() `, :py:meth:`ProcessBuilder.__rtruediv__() `, :py:meth:`ProcessBuilder.divide() `, :py:meth:`divide() `, :py:meth:`DataCube.divide() `, :py:meth:`DataCube.__truediv__() `, :py:meth:`DataCube.__rtruediv__() ` + * - `drop_dimension `_ + - :py:meth:`ProcessBuilder.drop_dimension() `, :py:meth:`drop_dimension() `, :py:meth:`DataCube.drop_dimension() ` + * - `e `_ + - :py:meth:`ProcessBuilder.e() `, :py:meth:`e() ` + * - `eq `_ + - :py:meth:`ProcessBuilder.__eq__() `, :py:meth:`ProcessBuilder.eq() `, :py:meth:`eq() `, :py:meth:`DataCube.__eq__() ` + * - `exp `_ + - :py:meth:`ProcessBuilder.exp() `, :py:meth:`exp() ` + * - `extrema `_ + - :py:meth:`ProcessBuilder.extrema() `, :py:meth:`extrema() ` + * - `filter_bands `_ + - :py:meth:`ProcessBuilder.filter_bands() `, :py:meth:`filter_bands() `, :py:meth:`DataCube.filter_bands() ` + * - `filter_bbox `_ + - :py:meth:`ProcessBuilder.filter_bbox() `, :py:meth:`filter_bbox() `, :py:meth:`DataCube.filter_bbox() ` + * - `filter_labels `_ + - :py:meth:`ProcessBuilder.filter_labels() `, :py:meth:`filter_labels() ` + * - `filter_spatial `_ + - :py:meth:`ProcessBuilder.filter_spatial() `, :py:meth:`filter_spatial() `, :py:meth:`DataCube.filter_spatial() ` + * - `filter_temporal `_ + - :py:meth:`ProcessBuilder.filter_temporal() `, :py:meth:`filter_temporal() `, :py:meth:`DataCube.filter_temporal() ` + * - `first `_ + - :py:meth:`ProcessBuilder.first() `, :py:meth:`first() ` + * - `fit_class_random_forest `_ + - :py:meth:`ProcessBuilder.fit_class_random_forest() `, :py:meth:`fit_class_random_forest() `, :py:meth:`VectorCube.fit_class_random_forest() ` + * - `fit_curve `_ + - :py:meth:`ProcessBuilder.fit_curve() `, :py:meth:`fit_curve() `, :py:meth:`DataCube.fit_curve() ` + * - `fit_regr_random_forest `_ + - :py:meth:`ProcessBuilder.fit_regr_random_forest() `, :py:meth:`fit_regr_random_forest() `, :py:meth:`VectorCube.fit_regr_random_forest() ` + * - `flatten_dimensions `_ + - :py:meth:`ProcessBuilder.flatten_dimensions() `, :py:meth:`flatten_dimensions() `, :py:meth:`DataCube.flatten_dimensions() ` + * - `floor `_ + - :py:meth:`ProcessBuilder.floor() `, :py:meth:`floor() ` + * - `ge `_ + - :py:meth:`ProcessBuilder.__ge__() `, :py:meth:`DataCube.__ge__() ` + * - `gt `_ + - :py:meth:`ProcessBuilder.__gt__() `, :py:meth:`ProcessBuilder.gt() `, :py:meth:`gt() `, :py:meth:`DataCube.__gt__() ` + * - `gte `_ + - :py:meth:`ProcessBuilder.gte() `, :py:meth:`gte() ` + * - `if_ `_ + - :py:meth:`ProcessBuilder.if_() `, :py:meth:`if_() ` + * - `inspect `_ + - :py:meth:`ProcessBuilder.inspect() `, :py:meth:`inspect() ` + * - `int `_ + - :py:meth:`ProcessBuilder.int() `, :py:meth:`int() ` + * - `is_infinite `_ + - :py:meth:`ProcessBuilder.is_infinite() `, :py:meth:`is_infinite() ` + * - `is_nan `_ + - :py:meth:`ProcessBuilder.is_nan() `, :py:meth:`is_nan() ` + * - `is_nodata `_ + - :py:meth:`ProcessBuilder.is_nodata() `, :py:meth:`is_nodata() ` + * - `is_valid `_ + - :py:meth:`ProcessBuilder.is_valid() `, :py:meth:`is_valid() ` + * - `last `_ + - :py:meth:`ProcessBuilder.last() `, :py:meth:`last() ` + * - `le `_ + - :py:meth:`DataCube.__le__() ` + * - `linear_scale_range `_ + - :py:meth:`ProcessBuilder.linear_scale_range() `, :py:meth:`linear_scale_range() `, :py:meth:`DataCube.linear_scale_range() ` + * - `ln `_ + - :py:meth:`ProcessBuilder.ln() `, :py:meth:`ln() `, :py:meth:`DataCube.ln() ` + * - `load_collection `_ + - :py:meth:`ProcessBuilder.load_collection() `, :py:meth:`load_collection() `, :py:meth:`DataCube.load_collection() `, :py:meth:`Connection.load_collection() ` + * - `load_geojson `_ + - :py:meth:`VectorCube.load_geojson() `, :py:meth:`Connection.load_geojson() ` + * - `load_ml_model `_ + - :py:meth:`ProcessBuilder.load_ml_model() `, :py:meth:`load_ml_model() `, :py:meth:`MlModel.load_ml_model() ` + * - `load_result `_ + - :py:meth:`ProcessBuilder.load_result() `, :py:meth:`load_result() `, :py:meth:`Connection.load_result() ` + * - `load_stac `_ + - :py:meth:`Connection.load_stac() ` + * - `load_uploaded_files `_ + - :py:meth:`ProcessBuilder.load_uploaded_files() `, :py:meth:`load_uploaded_files() ` + * - `log `_ + - :py:meth:`ProcessBuilder.log() `, :py:meth:`log() `, :py:meth:`DataCube.logarithm() `, :py:meth:`DataCube.log2() `, :py:meth:`DataCube.log10() ` + * - `lt `_ + - :py:meth:`ProcessBuilder.__lt__() `, :py:meth:`ProcessBuilder.lt() `, :py:meth:`lt() `, :py:meth:`DataCube.__lt__() ` + * - `lte `_ + - :py:meth:`ProcessBuilder.__le__() `, :py:meth:`ProcessBuilder.lte() `, :py:meth:`lte() ` + * - `mask `_ + - :py:meth:`ProcessBuilder.mask() `, :py:meth:`mask() `, :py:meth:`DataCube.mask() ` + * - `mask_polygon `_ + - :py:meth:`ProcessBuilder.mask_polygon() `, :py:meth:`mask_polygon() `, :py:meth:`DataCube.mask_polygon() ` + * - `max `_ + - :py:meth:`ProcessBuilder.max() `, :py:meth:`max() `, :py:meth:`DataCube.max_time() ` + * - `mean `_ + - :py:meth:`ProcessBuilder.mean() `, :py:meth:`mean() `, :py:meth:`DataCube.mean_time() ` + * - `median `_ + - :py:meth:`ProcessBuilder.median() `, :py:meth:`median() `, :py:meth:`DataCube.median_time() ` + * - `merge_cubes `_ + - :py:meth:`ProcessBuilder.merge_cubes() `, :py:meth:`merge_cubes() `, :py:meth:`DataCube.merge_cubes() ` + * - `min `_ + - :py:meth:`ProcessBuilder.min() `, :py:meth:`min() `, :py:meth:`DataCube.min_time() ` + * - `mod `_ + - :py:meth:`ProcessBuilder.mod() `, :py:meth:`mod() ` + * - `multiply `_ + - :py:meth:`ProcessBuilder.__mul__() `, :py:meth:`ProcessBuilder.__rmul__() `, :py:meth:`ProcessBuilder.__neg__() `, :py:meth:`ProcessBuilder.multiply() `, :py:meth:`multiply() `, :py:meth:`DataCube.multiply() `, :py:meth:`DataCube.__neg__() `, :py:meth:`DataCube.__mul__() `, :py:meth:`DataCube.__rmul__() ` + * - `nan `_ + - :py:meth:`ProcessBuilder.nan() `, :py:meth:`nan() ` + * - `ndvi `_ + - :py:meth:`ProcessBuilder.ndvi() `, :py:meth:`ndvi() `, :py:meth:`DataCube.ndvi() ` + * - `neq `_ + - :py:meth:`ProcessBuilder.__ne__() `, :py:meth:`ProcessBuilder.neq() `, :py:meth:`neq() `, :py:meth:`DataCube.__ne__() ` + * - `normalized_difference `_ + - :py:meth:`ProcessBuilder.normalized_difference() `, :py:meth:`normalized_difference() `, :py:meth:`DataCube.normalized_difference() ` + * - `not `_ + - :py:meth:`DataCube.__invert__() ` + * - `not_ `_ + - :py:meth:`ProcessBuilder.not_() `, :py:meth:`not_() ` + * - `or `_ + - :py:meth:`DataCube.logical_or() `, :py:meth:`DataCube.__or__() ` + * - `or_ `_ + - :py:meth:`ProcessBuilder.or_() `, :py:meth:`or_() ` + * - `order `_ + - :py:meth:`ProcessBuilder.order() `, :py:meth:`order() ` + * - `pi `_ + - :py:meth:`ProcessBuilder.pi() `, :py:meth:`pi() ` + * - `power `_ + - :py:meth:`ProcessBuilder.__pow__() `, :py:meth:`ProcessBuilder.power() `, :py:meth:`power() `, :py:meth:`DataCube.__rpow__() `, :py:meth:`DataCube.__pow__() `, :py:meth:`DataCube.power() ` + * - `predict_curve `_ + - :py:meth:`ProcessBuilder.predict_curve() `, :py:meth:`predict_curve() `, :py:meth:`DataCube.predict_curve() ` + * - `predict_random_forest `_ + - :py:meth:`ProcessBuilder.predict_random_forest() `, :py:meth:`predict_random_forest() `, :py:meth:`DataCube.predict_random_forest() ` + * - `product `_ + - :py:meth:`ProcessBuilder.product() `, :py:meth:`product() ` + * - `quantiles `_ + - :py:meth:`ProcessBuilder.quantiles() `, :py:meth:`quantiles() ` + * - `rearrange `_ + - :py:meth:`ProcessBuilder.rearrange() `, :py:meth:`rearrange() ` + * - `reduce_dimension `_ + - :py:meth:`ProcessBuilder.reduce_dimension() `, :py:meth:`reduce_dimension() `, :py:meth:`DataCube.reduce_dimension() ` + * - `reduce_spatial `_ + - :py:meth:`ProcessBuilder.reduce_spatial() `, :py:meth:`reduce_spatial() ` + * - `rename_dimension `_ + - :py:meth:`ProcessBuilder.rename_dimension() `, :py:meth:`rename_dimension() `, :py:meth:`DataCube.rename_dimension() ` + * - `rename_labels `_ + - :py:meth:`ProcessBuilder.rename_labels() `, :py:meth:`rename_labels() `, :py:meth:`DataCube.rename_labels() ` + * - `resample_cube_spatial `_ + - :py:meth:`ProcessBuilder.resample_cube_spatial() `, :py:meth:`resample_cube_spatial() ` + * - `resample_cube_temporal `_ + - :py:meth:`ProcessBuilder.resample_cube_temporal() `, :py:meth:`resample_cube_temporal() `, :py:meth:`DataCube.resample_cube_temporal() ` + * - `resample_spatial `_ + - :py:meth:`ProcessBuilder.resample_spatial() `, :py:meth:`resample_spatial() `, :py:meth:`DataCube.resample_spatial() ` + * - `resolution_merge `_ + - :py:meth:`DataCube.resolution_merge() ` + * - `round `_ + - :py:meth:`ProcessBuilder.round() `, :py:meth:`round() ` + * - `run_udf `_ + - :py:meth:`ProcessBuilder.run_udf() `, :py:meth:`run_udf() `, :py:meth:`VectorCube.run_udf() ` + * - `run_udf_externally `_ + - :py:meth:`ProcessBuilder.run_udf_externally() `, :py:meth:`run_udf_externally() ` + * - `sar_backscatter `_ + - :py:meth:`ProcessBuilder.sar_backscatter() `, :py:meth:`sar_backscatter() `, :py:meth:`DataCube.sar_backscatter() ` + * - `save_ml_model `_ + - :py:meth:`ProcessBuilder.save_ml_model() `, :py:meth:`save_ml_model() ` + * - `save_result `_ + - :py:meth:`ProcessBuilder.save_result() `, :py:meth:`save_result() `, :py:meth:`VectorCube.save_result() `, :py:meth:`DataCube.save_result() ` + * - `sd `_ + - :py:meth:`ProcessBuilder.sd() `, :py:meth:`sd() ` + * - `sgn `_ + - :py:meth:`ProcessBuilder.sgn() `, :py:meth:`sgn() ` + * - `sin `_ + - :py:meth:`ProcessBuilder.sin() `, :py:meth:`sin() ` + * - `sinh `_ + - :py:meth:`ProcessBuilder.sinh() `, :py:meth:`sinh() ` + * - `sort `_ + - :py:meth:`ProcessBuilder.sort() `, :py:meth:`sort() ` + * - `sqrt `_ + - :py:meth:`ProcessBuilder.sqrt() `, :py:meth:`sqrt() ` + * - `subtract `_ + - :py:meth:`ProcessBuilder.__sub__() `, :py:meth:`ProcessBuilder.__rsub__() `, :py:meth:`ProcessBuilder.subtract() `, :py:meth:`subtract() `, :py:meth:`DataCube.subtract() `, :py:meth:`DataCube.__sub__() `, :py:meth:`DataCube.__rsub__() ` + * - `sum `_ + - :py:meth:`ProcessBuilder.sum() `, :py:meth:`sum() ` + * - `tan `_ + - :py:meth:`ProcessBuilder.tan() `, :py:meth:`tan() ` + * - `tanh `_ + - :py:meth:`ProcessBuilder.tanh() `, :py:meth:`tanh() ` + * - `text_begins `_ + - :py:meth:`ProcessBuilder.text_begins() `, :py:meth:`text_begins() ` + * - `text_concat `_ + - :py:meth:`ProcessBuilder.text_concat() `, :py:meth:`text_concat() ` + * - `text_contains `_ + - :py:meth:`ProcessBuilder.text_contains() `, :py:meth:`text_contains() ` + * - `text_ends `_ + - :py:meth:`ProcessBuilder.text_ends() `, :py:meth:`text_ends() ` + * - `trim_cube `_ + - :py:meth:`ProcessBuilder.trim_cube() `, :py:meth:`trim_cube() ` + * - `unflatten_dimension `_ + - :py:meth:`ProcessBuilder.unflatten_dimension() `, :py:meth:`unflatten_dimension() `, :py:meth:`DataCube.unflatten_dimension() ` + * - `variance `_ + - :py:meth:`ProcessBuilder.variance() `, :py:meth:`variance() ` + * - `vector_buffer `_ + - :py:meth:`ProcessBuilder.vector_buffer() `, :py:meth:`vector_buffer() ` + * - `vector_to_random_points `_ + - :py:meth:`ProcessBuilder.vector_to_random_points() `, :py:meth:`vector_to_random_points() ` + * - `vector_to_regular_points `_ + - :py:meth:`ProcessBuilder.vector_to_regular_points() `, :py:meth:`vector_to_regular_points() ` + * - `xor `_ + - :py:meth:`ProcessBuilder.xor() `, :py:meth:`xor() ` + +:subscript:`(Table autogenerated on 2023-08-07)` diff --git a/_sources/processes.rst.txt b/_sources/processes.rst.txt new file mode 100644 index 000000000..b81db1c53 --- /dev/null +++ b/_sources/processes.rst.txt @@ -0,0 +1,465 @@ +*********************** +Working with processes +*********************** + +In openEO, a **process** is an operation that performs a specific task on +a set of parameters and returns a result. +For example, with the ``add`` process you can add two numbers, in openEO's JSON notation:: + + { + "process_id": "add", + "arguments": {"x": 3, "y": 5} + } + + +A process is similar to a *function* in common programming languages, +and likewise, multiple processes can be combined or chained together +into new, more complex operations. + +A bit of terminology +==================== + +A **pre-defined process** is a process provided out of the box by a given *back-end*. +These are often the `centrally defined openEO processes `_, +such as common mathematical (``sum``, ``divide``, ``sqrt``, ...), +statistical (``mean``, ``max``, ...) and +image processing (``mask``, ``apply_kernel``, ...) +operations. +Back-ends are expected to support most of these standard ones, +but are free to pre-define additional ones too. + + +Processes can be combined into a larger pipeline, parameterized +and stored on the back-end as a so called **user-defined process**. +This allows you to build a library of reusable building blocks +that can be be inserted easily in multiple other places. +See :ref:`user-defined-processes` for more information. + + +How processes are combined into a larger unit +is internally represented by a so-called **process graph**. +It describes how the inputs and outputs of processes +should be linked together. +A user of the Python client should normally not worry about +the details of a process graph structure, as most of these aspects +are hidden behind regular Python functions, classes and methods. + + + +Using common pre-defined processes +=================================== + +The listing of pre-defined processes provided by a back-end +can be inspected with :func:`~openeo.rest.connection.Connection.list_processes`. +For example, to get a list of the process names (process ids):: + + >>> process_ids = [process["id"] for process in connection.list_processes()] + >>> print(process_ids[:16]) + ['arccos', 'arcosh', 'power', 'last', 'subtract', 'not', 'cosh', 'artanh', + 'is_valid', 'first', 'median', 'eq', 'absolute', 'arctan2', 'divide','is_nan'] + +More information about the processes, like a description +or expected parameters, can be queried like that, +but it is often easier to look them up on the +`official openEO process documentation `_ + +A single pre-defined process can be retrieved with +:func:`~openeo.rest.connection.Connection.describe_process`. + +Convenience methods +-------------------- + +Most of the important pre-defined processes are covered directly by methods +on classes like :class:`~openeo.rest.datacube.DataCube` or +:class:`~openeo.rest.vectorcube.VectorCube`. + +.. seealso:: + See :ref:`openeo_process_mapping` for a mapping of openEO processes + the corresponding methods in the openEO Python Client library. + +For example, to apply the ``filter_temporal`` process to a raster data cube:: + + cube = cube.filter_temporal("2020-02-20", "2020-06-06") + +Being regular Python methods, you get usual function call features +you're accustomed to: default values, keyword arguments, ``kwargs`` usage, ... +For example, to use a bounding box dictionary with ``kwargs``-expansion:: + + bbox = { + "west": 5.05, "south": 51.20, "east": 5.10, "north": 51.23 + } + cube = cube.filter_bbox(**bbox) + +Note that some methods try to be more flexible and convenient to use +than how the official process definition prescribes. +For example, the ``filter_temporal`` process expects an ``extent`` array +with 2 items (the start and end date), +but you can call the corresponding client method in multiple equivalent ways:: + + cube.filter_temporal("2019-07-01", "2019-08-01") + cube.filter_temporal(["2019-07-01", "2019-08-01"]) + cube.filter_temporal(extent=["2019-07-01", "2019-08-01"]) + cube.filter_temporal(start_date="2019-07-01", end_date="2019-08-01"]) + + +Advanced argument tweaking +--------------------------- + +.. versionadded:: 0.10.1 + +In some situations, you may want to finetune what the (convenience) methods generate. +For example, you want to play with non-standard, experimental arguments, +or there is a problem with a automatic argument handling/conversion feature. + +You can tweak the arguments of your current result node as follows. +Say, we want to add some non-standard ``feature_flags`` argument to the ``load_collection`` process node. +We first get the current result node with :py:meth:`~openeo.rest.datacube.DataCube.result_node` and use :py:meth:`~openeo.internal.graph_building.PGNode.update_arguments` to add an additional argument to it:: + + # `Connection.load_collection` does not support `feature_flags` argument + cube = connection.load_collection(...) + + # Add `feature_flag` argument `load_collection` process graph node + cube.result_node().update_arguments(feature_flags="rXPk") + + # The resulting process graph will now contain this non-standard argument: + # { + # "process_id": "load_collection", + # "arguments": { + # ... + # "feature_flags": "rXPk", + + +Generic API for adding processes +================================= + +An openEO back-end may offer processes that are not part of the core API, +or the client may not (yet) have a corresponding method +for a process that you wish to use. +In that case, you can fall back to a more generic API +that allows you to add processes directly. + +Basics +------ + +To add a simple process to the graph, use +the :func:`~openeo.rest.datacube.DataCube.process` method +on a :class:`~openeo.rest.datacube.DataCube`. +You have to specify the process id and arguments +(as a single dictionary or through keyword arguments ``**kwargs``). +It will return a new DataCube with the new process appended +to the internal process graph. + +.. # TODO this example makes no sense: it uses cube for what? + +A very simple example using the ``mean`` process and a +literal list in an arguments dictionary:: + + arguments= { + "data": [1, 3, -1] + } + res = cube.process("mean", arguments) + +or equivalently, leveraging keyword arguments:: + + res = cube.process("mean", data=[1, 3, -1]) + + +Passing data cube arguments +---------------------------- + +The example above is a bit convoluted however in the sense that +you start from a given data cube ``cube``, you add a ``mean`` process +that works on a given data array, while completely ignoring the original cube. +In reality you typically want to apply the process on the cube. +This is possible by passing a data cube object directly as argument, +for example with the ``ndvi`` process that at least expects +a data cube as ``data`` argument :: + + res = cube.process("ndvi", data=cube) + + +Note that you have to specify ``cube`` twice here: +a first time to call the method and a second time as argument. +Moreover, it requires you to define a Python variable for the data +cube, which is annoying if you want to use a chained expressions. +To solve these issues, you can use the :const:`~openeo.rest.datacube.THIS` +constant as symbolic reference to the "current" cube:: + + from openeo.rest.datacube import THIS + + res = ( + cube + .process("filter_bands", data=THIS) + .process("mask", data=THIS, mask=mask) + .process("ndvi", data=THIS) + ) + + +Passing results from other process calls as arguments +------------------------------------------------------ + +Another use case of generically applying (custom) processes is +passing a process result as argument to another process working on a cube. +For example, assume we have a custom process ``load_my_vector_cube`` +to load a vector cube from an online resource. +We can use this vector cube as geometry for +:py:meth:`DataCube.aggregate_spatial() ` +using :py:func:`openeo.processes.process()` as follows: + + +.. code-block:: python + + from openeo.processes import process + + res = cube.aggregate_spatial( + geometries=process("load_my_vector_cube", url="https://geo.example/features.db"), + reducer="mean" + ) + + +.. _callbackfunctions: + +Processes with child "callbacks" +================================ + +Some openEO processes expect some kind of sub-process +to be invoked on a subset or slice of the datacube. +For example: + +* process ``apply`` requires a transformation that will be applied + to each pixel in the cube (separately), e.g. in pseudocode + + .. code-block:: text + + cube.apply( + given a pixel value + => scale it with factor 0.01 + ) + +* process ``reduce_dimension`` requires an aggregation function to convert + an array of pixel values (along a given dimension) to a single value, + e.g. in pseudocode + + .. code-block:: text + + cube.reduce_dimension( + given a pixel timeseries (array) for a (x,y)-location + => temporal mean of that array + ) + +* process ``aggregate_spatial`` requires a function to aggregate the values + in one or more geometries + +These transformation functions are usually called "**callbacks**" +because instead of being called explicitly by the user, +they are called and managed by their "parent" process +(the ``apply``, ``reduce_dimension`` and ``aggregate_spatial`` in the examples) + + +The openEO Python Client Library currently provides a couple of DataCube methods +that expect such a callback, most commonly: + +- :py:meth:`openeo.rest.datacube.DataCube.aggregate_spatial` +- :py:meth:`openeo.rest.datacube.DataCube.aggregate_temporal` +- :py:meth:`openeo.rest.datacube.DataCube.apply` +- :py:meth:`openeo.rest.datacube.DataCube.apply_dimension` +- :py:meth:`openeo.rest.datacube.DataCube.apply_neighborhood` +- :py:meth:`openeo.rest.datacube.DataCube.reduce_dimension` + +The openEO Python Client Library supports several ways +to specify the desired callback for these functions: + + +.. contents:: + :depth: 1 + :local: + :backlinks: top + +Callback as string +------------------ + +The easiest way is passing a process name as a string, +for example: + +.. code-block:: python + + # Take the absolute value of each pixel + cube.apply("absolute") + + # Reduce a cube along the temporal dimension by taking the maximum value + cube.reduce_dimension(reducer="max", dimension="t") + +This approach is only possible if the desired transformation is available +as a single process. If not, use one of the methods below. + +It's also important to note that the "signature" of the provided callback process +should correspond properly with what the parent process expects. +For example: ``apply`` requires a callback process that receives a +number and returns one (like ``absolute`` or ``sqrt``), +while ``reduce_dimension`` requires a callback process that receives +an array of numbers and returns a single number (like ``max`` or ``mean``). + + +.. _child_callback_callable: + +Callback as a callable +----------------------- + +You can also specify the callback as a "callable": +which is a fancy word for a Python object that can be called, +but just think of it like a function you can call. + +You can use a regular Python function, like this: + +.. code-block:: python + + def transform(x): + return x * 2 + 3 + + cube.apply(transform) + +or, more compactly, a "lambda" +(a construct in Python to create anonymous inline functions): + +.. code-block:: python + + cube.apply(lambda x: x * 2 + 3) + + +The openEO Python Client Library implements most of the official openEO processes as +:ref:`functions in the "openeo.processes" module `, +which can be used directly as callback: + +.. code-block:: python + + from openeo.processes import absolute, max + + cube.apply(absolute) + cube.reduce_dimension(reducer=max, dimension="t") + + +The argument that will be passed to all these callback functions is +a :py:class:`ProcessBuilder ` instance. +This is a helper object with predefined methods for all standard openEO processes, +allowing to use an object oriented coding style to define the callback. +For example: + +.. code-block:: python + + from openeo.processes import ProcessBuilder + + def avg(data: ProcessBuilder): + return data.mean() + + cube.reduce_dimension(reducer=avg, dimension="t") + + +These methods also return :py:class:`ProcessBuilder ` objects, +which also allows writing callbacks in chained fashion: + +.. code-block:: python + + cube.apply( + lambda x: x.absolute().cos().add(y=1.23) + ) + + +All this gives a lot of flexibility to define callbacks compactly +in a desired coding style. +The following examples result in the same callback: + +.. code-block:: python + + from openeo.processes import ProcessBuilder, mean, cos, add + + # Chained methods + cube.reduce_dimension( + lambda data: data.mean().cos().add(y=1.23), + dimension="t" + ) + + # Functions + cube.reduce_dimension( + lambda data: add(x=cos(mean(data)), y=1.23), + dimension="t" + ) + + # Mixing methods, functions and operators + cube.reduce_dimension( + lambda data: cos(data.mean())) + 1.23, + dimension="t" + ) + + +Caveats +```````` + +Specifying callbacks through Python functions (or lambdas) +looks intuitive and straightforward, but it should be noted +that not everything is allowed in these functions. +You should just limit yourself to calling +:py:mod:`openeo.processes` functions, +:py:class:`ProcessBuilder ` methods +and basic math operators. +Don't call functions from other libraries like numpy or scipy. +Don't use Python control flow statements like ``if/else`` constructs +or ``for`` loops. + +The reason for this is that the openEO Python Client Library +does not translate the function source code itself +to an openEO process graph. +Instead, when building the openEO process graph, +it passes a special object to the function +and keeps track of which :py:mod:`openeo.processes` functions +were called to assemble the corresponding process graph. +If you use control flow statements or use numpy functions for example, +this procedure will incorrectly detect what you want to do in the callback. + +For example, if you mistakenly use the Python builtin :py:func:`sum` function +in a callback instead of :py:func:`openeo.processes.sum`, you will run into trouble. +Luckily the openEO Python client Library should raise an error if it detects that:: + + >>> # Wrongly using builtin `sum` function + >>> cube.reduce_dimension(dimension="t", reducer=sum) + RuntimeError: Exceeded ProcessBuilder iteration limit. + Are you mistakenly using a builtin like `sum()` or `all()` in a callback + instead of the appropriate helpers from `openeo.processes`? + + >>> # Explicit usage of `openeo.processes.sum` + >>> import openeo.processes + >>> cube.reduce_dimension(dimension="t", reducer=openeo.processes.sum) + + + + +Callback as ``PGNode`` +----------------------- + +You can also pass a :py:class:`~openeo.internal.graph_building.PGNode` object as callback. + +.. attention:: + This approach should generally not be used in normal use cases. + The other options discussed above should be preferred. + It's mainly intended for internal use and an occasional, advanced use case. + It requires in-depth knowledge of the openEO API + and openEO Python Client Library to construct correctly. + +Some examples: + +.. code-block:: python + + from openeo.internal.graph_building import PGNode + + cube.apply(PGNode( + "add", + x=PGNode( + "cos", + x=PGNode("absolute", x={"from_parameter": "x"}) + ), + y=1.23 + )) + + cube.reduce_dimension( + reducer=PGNode("max", data={"from_parameter": "data"}), + dimension="bands" + ) diff --git a/_sources/udf.rst.txt b/_sources/udf.rst.txt new file mode 100644 index 000000000..5f7983764 --- /dev/null +++ b/_sources/udf.rst.txt @@ -0,0 +1,701 @@ +.. index:: User-defined functions +.. index:: UDF + +.. _user-defined-functions: + +###################################### +User-Defined Functions (UDF) explained +###################################### + + +While openEO supports a wide range of pre-defined processes +and allows to build more complex user-defined processes from them, +you sometimes need operations or algorithms that are +not (yet) available or standardized as openEO process. +**User-Defined Functions (UDF)** is an openEO feature +(through the `run_udf `_ process) +that aims to fill that gap by allowing a user to express (a part of) +an **algorithm as a Python/R/... script to be run back-end side**. + +There are a lot of details to cover, +but here is a rudimentary example snippet +to give you a quick impression of how to work with UDFs +using the openEO Python Client library: + +.. code-block:: python + :caption: Basic UDF usage example snippet to rescale pixel values + + import openeo + + # Build a UDF object from an inline string with Python source code. + udf = openeo.UDF(""" + import xarray + + def apply_datacube(cube: xarray.DataArray, context: dict) -> xarray.DataArray: + cube.values = 0.0001 * cube.values + return cube + """) + + # Or load the UDF code from a separate file. + # udf = openeo.UDF.from_file("udf-code.py") + + # Apply the UDF to a cube. + rescaled_cube = cube.apply(process=udf) + + +Ideally, it allows you to embed existing Python/R/... implementations +in an openEO workflow (with some necessary "glue code"). +However, it is recommended to try to do as much pre- or postprocessing +with pre-defined processes +before blindly copy-pasting source code snippets as UDFs. +Pre-defined processes are typically well-optimized by the backend, +while UDFs can come with a performance penalty +and higher development/debug/maintenance costs. + + +.. warning:: + + Don not confuse **user-defined functions** (abbreviated as UDF) with + **user-defined processes** (sometimes abbreviated as UDP) in openEO, + which is a way to define and use your own process graphs + as reusable building blocks. + See :ref:`user-defined-processes` for more information. + + + +Applicability and Constraints +============================== + +.. index:: chunking + +openEO is designed to work transparently on large data sets +and your UDF has to follow a couple of guidelines to make that possible. +First of all, as data cubes play a central role in openEO, +your UDF should accept and return correct **data cube structures**, +with proper dimensions, dimension labels, etc. +Moreover, the back-end will typically divide your input data cube +in smaller chunks and process these chunks separately (e.g. on isolated workers). +Consequently, it's important that your **UDF algorithm operates correctly +in such a chunked processing context**. + +A very common mistake is to use index-based array indexing, rather than name based. The index based approach +assumes that datacube dimension order is fixed, which is not guaranteed. Next to that, it also reduces the readability +of your code. Label based indexing is a great feature of xarray, and should be used whenever possible. + +As a rule of thumb, the UDF should preserve the dimensions and shape of the input +data cube. The datacube chunk that is passed on by the backend does not have a fixed +specification, so the UDF needs to be able to accomodate different shapes and sizes of the data. + +There's important exceptions to this rule, that depend on the context in which the UDF is used. +For instance, a UDF used as a reducer should effectively remove the reduced dimension from the +output chunk. These details are documented in the next sections. + +UDFs as apply/reduce "callbacks" +--------------------------------- + +UDFs are typically used as "callback" processes for "meta" processes +like ``apply`` or ``reduce_dimension`` (also see :ref:`callbackfunctions`). +These meta-processes make abstraction of a datacube as a whole +and allow the callback to focus on a small slice of data or a single dimension. +Their nature instructs the backend how the data should be processed +and can be chunked: + +`apply `_ + Applies a process on *each pixel separately*. + The back-end has all freedom to choose chunking + (e.g. chunk spatially and temporally). + Dimensions and their labels are fully preserved. + This function has limited practical use in combination with UDF's. + +`apply_dimension `_ + Applies a process to all pixels *along a given dimension* + to produce a new series of values for that dimension. + The back-end will not split your data on that dimension. + For example, when working along the time dimension, + your UDF is guaranteed to receive a full timeseries, + but the data could be chunked spatially. + All dimensions and labels are preserved, + except for the dimension along which ``apply_dimension`` is applied: + the number of dimension labels is allowed to change. + +`reduce_dimension `_ + Applies a process to all pixels *along a given dimension* + to produce a single value, eliminating that dimension. + Like with ``apply_dimension``, the back-end will + not split your data on that dimension. + The dimension along which ``apply_dimension`` is applied must be removed + from the output. + For example, when applying ``reduce_dimension`` on a spatiotemporal cube + along the time dimension, + the UDF is guaranteed to receive full timeseries + (but the data could be chunked spatially) + and the output cube should only be a spatial cube, without a temporal dimension + +`apply_neighborhood `_ + Applies a process to a neighborhood of pixels + in a sliding-window fashion with (optional) overlap. + Data chunking in this case is explicitly controlled by the user. + Dimensions and number of labels are fully preserved. This is the most versatile + and widely used function to work with UDF's. + + + +UDF function names and signatures +================================== + +The UDF code you pass to the back-end is basically a Python script +that contains one or more functions. +Exactly one of these functions should have a proper UDF signature, +as defined in the :py:mod:`openeo.udf.udf_signatures` module, +so that the back-end knows what the *entrypoint* function is +of your UDF implementation. + + +Module ``openeo.udf.udf_signatures`` +------------------------------------- + + +.. automodule:: openeo.udf.udf_signatures + :members: + + + +.. _udf_example_apply: + +A first example: ``apply`` with an UDF to rescale pixel values +================================================================ + +In most of the examples here, we will start from an initial Sentinel2 data cube like this: + +.. code-block:: python + + s2_cube = connection.load_collection( + "SENTINEL2_L2A", + spatial_extent={"west": 4.00, "south": 51.04, "east": 4.10, "north": 51.1}, + temporal_extent=["2022-03-01", "2022-03-31"], + bands=["B02", "B03", "B04"] + ) + + +The raw values in this initial ``s2_cube`` data cube are **digital numbers** +(integer values ranging from 0 to several thousands) +and to get **physical reflectance** values (float values, typically in the range between 0 and 0.5), +we have to rescale them. +This is a simple local transformation, without any interaction between pixels, +which is the modus operandi of the ``apply`` processes. + +.. note:: + + In practice it will be a lot easier and more efficient to do this kind of rescaling + with pre-defined openEO math processes, for example: ``s2_cube.apply(lambda x: 0.0001 * x)``. + This is just a very simple illustration to get started with UDFs. In fact, it's very likely that + you will never want to use a UDF with apply. + +UDF script +---------- + +The UDF code is this short script (the part that does the actual value rescaling is highlighted): + +.. code-block:: python + :linenos: + :caption: ``udf-code.py`` + :emphasize-lines: 5 + + import xarray + + def apply_datacube(cube: xarray.DataArray, context: dict) -> xarray.DataArray: + cube.values = 0.0001 * cube.values + return cube + +Some details about this UDF script: + +- line 1: We import `xarray` as we use this as exchange format. +- line 3: We define a function named ``apply_datacube``, + which receives and returns a :py:class:`~xarray.DataArray` instance. + We follow here the :py:meth:`~openeo.udf.udf_signatures.apply_datacube()` UDF function signature. +- line 4: Because our scaling operation is so simple, we can transform the ``xarray.DataArray`` values in-place. +- line 5: Consequently, because the values were updated in-place, we can return the same Xarray object. + +Workflow script +---------------- + +In this first example, we'll cite a full, standalone openEO workflow script, +including creating the back-end connection, loading the initial data cube and downloading the result. +The UDF-specific part is highlighted. + +.. warning:: + This implementation depends on :py:class:`openeo.UDF ` improvements + that were introduced in version 0.13.0 of the openeo Python Client Library. + If you are currently stuck with working with an older version, + check :ref:`old_udf_api` for more information on the difference with the old API. + +.. code-block:: python + :linenos: + :caption: UDF usage example snippet + :emphasize-lines: 14-25 + + import openeo + + # Create connection to openEO back-end + connection = openeo.connect("...").authenticate_oidc() + + # Load initial data cube. + s2_cube = connection.load_collection( + "SENTINEL2_L2A", + spatial_extent={"west": 4.00, "south": 51.04, "east": 4.10, "north": 51.1}, + temporal_extent=["2022-03-01", "2022-03-31"], + bands=["B02", "B03", "B04"] + ) + + # Create a UDF object from inline source code. + udf = openeo.UDF(""" + import xarray + + def apply_datacube(cube: xarray.DataArray, context: dict) -> xarray.DataArray: + cube.values = 0.0001 * cube.values + return cube + """) + + # Pass UDF object as child process to `apply`. + rescaled = s2_cube.apply(process=udf) + + rescaled.download("apply-udf-scaling.nc") + +In line 15, we build an :py:class:`openeo.UDF ` object +from an inline string with the UDF source code. +This :py:class:`openeo.UDF ` object encapsulates various aspects +that are necessary to create a ``run_udf`` node in the process graph, +and we can pass it directly in line 25 as the ``process`` argument +to :py:meth:`DataCube.apply() `. + +.. tip:: + + Instead of putting your UDF code in an inline string like in the example, + it's often a good idea to **load the UDF code from a separate file**, + which is easier to maintain in your preferred editor or IDE. + You can do that directly with the + :py:meth:`openeo.UDF.from_file ` method: + + .. code-block:: python + + udf = openeo.UDF.from_file("udf-code.py") + +After downloading the result, we can inspect the band values locally. +Note see that they fall mainly in a range from 0 to 1 (in most cases even below 0.2), +instead of the original digital number range (thousands): + +.. image:: _static/images/udf/apply-rescaled-histogram.png + + +UDF's that transform cube metadata +================================== +This is a new/experimental feature so may still be subject to change. + +In some cases, a UDF can have impact on the metadata of a cube, but this can not always +be easily inferred by process graph evaluation logic without running the actual +(expensive) UDF code. This limits the possibilities to validate process graphs, +or for instance make an estimate of the size of a datacube after applying a UDF. + +To provide evaluation logic with this information, the user should implement the +:py:meth:`~openeo.udf.udf_signatures.apply_metadata()` function as part of the UDF. +Please refer to the documentation of that function for more information. + +.. literalinclude:: ../examples/udf/udf_modify_spatial.py + :language: python + :caption: Example of a UDF that adjusts spatial metadata ``udf_modify_spatial.py`` + :name: spatial_udf + +To invoke a UDF like this, the apply_neighborhood method is most suitable: + +.. code-block:: python + + udf_code = Path('udf_modify_spatial.py').read_text() + cube_updated = cube.apply_neighborhood( + lambda data: data.run_udf(udf=udf_code, runtime='Python-Jep', context=dict()), + size=[ + {'dimension': 'x', 'value': 128, 'unit': 'px'}, + {'dimension': 'y', 'value': 128, 'unit': 'px'} + ], overlap=[]) + + + +Example: ``apply_dimension`` with a UDF +======================================== + +This is useful when running custom code over all band values for a given pixel or all observations per pixel. +See section below 'Smoothing timeseries with a user defined function' for a concrete example. + +Example: ``reduce_dimension`` with a UDF +======================================== + +The key element for a UDF invoked in the context of `reduce_dimension` is that it should actually return +an Xarray DataArray _without_ the dimension that is specified to be reduced. + +So a reduce over time would receive a DataArray with `bands,t,y,x` dimensions, and return one with only `bands,y,x`. + + +Example: ``apply_neighborhood`` with a UDF +=========================================== + +The apply_neighborhood process is generally used when working with complex AI models that require a +spatiotemporal input stack with a fixed size. It supports the ability to specify overlap, to ensure that the model +has sufficient border information to generate a spatially coherent output across chunks of the raster data cube. + +In the example below, the UDF will receive chunks of 128x128 pixels: 112 is the chunk size, while 2 times 8 pixels of +overlap on each side of the chunk results in 128. + +The time and band dimensions are not specified, which means that all values along these dimensions are passed into +the datacube. + + +.. code-block:: python + + output_cube = inputs_cube.apply_neighborhood(my_udf, size=[ + {'dimension': 'x', 'value': 112, 'unit': 'px'}, + {'dimension': 'y', 'value': 112, 'unit': 'px'} + ], overlap=[ + {'dimension': 'x', 'value': 8, 'unit': 'px'}, + {'dimension': 'y', 'value': 8, 'unit': 'px'} + ]) + + + +.. warning:: + +The ``apply_neighborhood`` is the most versatile, but also most complex process. Make sure to keep an eye on the dimensions +and the shape of the DataArray returned by your UDF. For instance, a very common error is to somehow 'flip' the spatial dimensions. +Debugging the UDF locally can help, but then you will want to try and reproduce the input that you get also on the backend. +This can typically be achieved by using logging to inspect the DataArrays passed into your UDF backend side. + + + +Example: Smoothing timeseries with a user defined function (UDF) +================================================================== + +In this example, we start from the ``evi_cube`` that was created in the previous example, and want to +apply a temporal smoothing on it. More specifically, we want to use the "Savitzky Golay" smoother +that is available in the SciPy Python library. + + +To ensure that openEO understand your function, it needs to follow some rules, the UDF specification. +This is an example that follows those rules: + +.. literalinclude:: ../examples/udf/smooth_savitzky_golay.py + :language: python + :caption: Example UDF code ``smooth_savitzky_golay.py`` + :name: savgol_udf + +The method signature of the UDF is very important, because the back-end will use it to detect +the type of UDF. +This particular example accepts a :py:class:`~openeo.rest.datacube.DataCube` object as input and also returns a :py:class:`~openeo.rest.datacube.DataCube` object. +The type annotations and method name are actually used to detect how to invoke the UDF, so make sure they remain unchanged. + + +Once the UDF is defined in a separate file, we load it +and apply it along a dimension: + +.. code-block:: python + + smoothing_udf = openeo.UDF.from_file('smooth_savitzky_golay.py') + smoothed_evi = evi_cube_masked.apply_dimension(smoothing_udf, dimension="t") + + +Downloading a datacube and executing an UDF locally +============================================================= + +Sometimes it is advantageous to run a UDF on the client machine (for example when developing/testing that UDF). +This is possible by using the convenience function :py:func:`openeo.udf.run_code.execute_local_udf`. +The steps to run a UDF (like the code from ``smooth_savitzky_golay.py`` above) are as follows: + +* Run the processes (or process graph) preceding the UDF and download the result in 'NetCDF' or 'JSON' format. +* Run :py:func:`openeo.udf.run_code.execute_local_udf` on the data file. + +For example:: + + from pathlib import Path + from openeo.udf import execute_local_udf + + my_process = connection.load_collection(... + + my_process.download('test_input.nc', format='NetCDF') + + smoothing_udf = Path('smooth_savitzky_golay.py').read_text() + execute_local_udf(smoothing_udf, 'test_input.nc', fmt='netcdf') + +Note: this algorithm's primary purpose is to aid client side development of UDFs using small datasets. It is not designed for large jobs. + +UDF dependency management +========================= + +UDFs usually have some dependencies on existing libraries, e.g. to implement complex algorithms. +In case of Python UDFs, it can be assumed that common libraries like numpy and Xarray are readily available, +not in the least because they underpin the Python UDF function signatures. +More concretely, it is possible to inspect available libraries for the available UDF runtimes +through :py:meth:`Connection.list_udf_runtimes()`. +For example, to list the available libraries for runtime "Python" (version "3"): + +.. code-block:: pycon + + >>> connection.list_udf_runtimes()["Python"]["versions"]["3"]["libraries"] + {'geopandas': {'version': '0.13.2'}, + 'numpy': {'version': '1.22.4'}, + 'xarray': {'version': '0.16.2'}, + ... + +Managing and using additional dependencies or libraries that are not provided out-of-the-box by a backend +is a more challenging problem and the practical details can vary between backends. + + +.. _python-udf-dependency-declaration: + +Standard for declaring Python UDF dependencies +----------------------------------------------- + +.. warning:: + + This is based on a fairly recent standard and it might not be supported by your chosen backend yet. + + +`PEP 723 "Inline script metadata" `_ defines a standard +for *Python scripts* to declare dependencies inside a top-level comment block. +If the openEO backend of your choice supports this standard, it is the preferred approach +to declare the (``import``) dependencies of your Python UDF: + +- It avoids all the overhead for the UDF developer + to correctly and efficiently make desired dependencies available in the UDF. +- It allows the openEO backend to optimize dependencies handling. + +.. warning:: + + An openEO backend might only support this automatic UDF dependency handling feature + in batch jobs (because of their isolated nature), + but not for synchronous processing requests. + + +Declaration of UDF dependencies +``````````````````````````````` + +A basic example of how the UDF dependencies can be declared in top-level comment block of your Python UDF: + +.. code-block:: python + :emphasize-lines: 1-6 + + # /// script + # dependencies = [ + # "geojson", + # "fancy-eo-library", + # ] + # /// + # + # This openEO UDF script implements ... + # based on the fancy-eo-library ... using geosjon data ... + + import geojson + import fancyeo + + def apply_datacube(cube: xarray.DataArray, context: dict) -> xarray.DataArray: + ... + +Some considerations to make sure you have a valid metadata block: + +- Lines start with a single hash ``#`` and one space (the space can be omitted if the ``#`` is the only character on the line). +- The metadata block starts with a line ``# /// script`` and ends with ``# ///``. +- Between these delimiters you put the metadata fields in `TOML format `_, + each line prefixed with ``#`` and a space. +- Declare your UDF's dependencies in a ``dependencies`` field as a TOML array. + List each package on a separate line as shown above, or put them all on a single line. + It is also allowed to include comments, as long as the whole construct is valid TOML. +- Each ``dependencies`` entry must be a valid `PEP 508 `_ dependency specifier. + This practically means to use the package names (optionally with version constraints) + as expected by the ``pip install`` command. + +A more complex example to illustrate some more advanced aspects of the metadata block: + +.. code-block:: python + + # /// script + # dependencies = [ + # # A comment about using at least version 2.5.0 + # 'geojson>=2.5.0', # An inline comment + # # Note that TOML allows both single and double quotes for strings. + # + # # Install a package "fancyeo" from a (ZIP) source archive URL. + # "fancyeo @ https://github.com/fncy/fancyeo/archive/refs/tags/v3.2.0-alpha1.zip", + # # Or from a wheel URL, including a content hash to be verified before installing. + # "lousyeo @ https://example.com/lousyeo-6.6.6-py3-none-any.whl#sha1=4bbb3c72a9234ee998a6de940a148e346a", + # # Note that the last entry may have a trailing comma. + # ] + # /// + + +Verification +```````````` + +Use :py:func:`~openeo.udf.run_code.extract_udf_dependencies` to verify +that your metadata block can be parsed correctly: + +.. code-block:: pycon + + >>> from openeo.udf.run_code import extract_udf_dependencies + >>> extract_udf_dependencies(udf_code) + ['geojson>=2.5.0', + 'fancyeo @ https://github.com/fncy/fancyeo/archive/refs/tags/v3.2.0-alpha1.zip', + 'lousyeo @ https://example.com/lousyeo-6.6.6-py3-none-any.whl#sha1=4bbb3c72a9234ee998a6de940a148e346a'] + +If no valid metadata block is found, ``None`` will be returned. + +.. note:: + This function won't necessarily raise exceptions for syntax errors in the metadata block. + It might just fail to reliably detect anything and skip it as regular comment lines. + + +Ad-hoc dependency handling +--------------------------- + +If dependency handling through standardized UDF declarations is not supported by the backend, +there are still ways to manually handle additional dependencies in your UDF. +The exact details can vary between backends, but we can give some general pointers here: + +- Multiple Python dependencies can be packaged fairly easily by zipping a Python virtual environment. +- For some dependencies, it can be important that the Python major version of the virtual environment is the same as the one used by the backend. +- Python allows you to dynamically append (or prepend) libraries to the search path: ``sys.path.append("unzipped_virtualenv_location")`` + + + +Profile a process server-side +============================== + + +.. warning:: + Experimental feature - This feature only works on back-ends running the Geotrellis implementation, and has not yet been + adopted in the openEO API. + +Sometimes users want to 'profile' their UDF on the back-end. While it's recommended to first profile it offline, in the +same manner as you can debug UDF's, back-ends may support profiling directly. +Note that this will only generate statistics over the python part of the execution, therefore it is only suitable for profiling UDFs. + +Usage +------ + +Only batch jobs are supported! In order to turn on profiling, set 'profile' to 'true' in job options:: + + job_options={'profile':'true'} + ... # prepare the process + process.execute_batch('result.tif',job_options=job_options) + +When the process has finished, it will also download a file called 'profile_dumps.tar.gz': + +- ``rdd_-1.pstats`` is the profile data of the python driver, +- the rest are the profiling results of the individual rdd id-s (that can be correlated with the execution using the SPARK UI). + +Viewing profiling information +------------------------------ + +The simplest way is to visualize the results with a graphical visualization tool called kcachegrind. +In order to do that, install `kcachegrind `_ packages (most linux distributions have it installed by default) and it's python connector `pyprof2calltree `_. +From command line run:: + + pyprof2calltree rdd_.pstats. + +Another way is to use the builtin pstats functionality from within python:: + + import pstats + p = pstats.Stats('restats') + p.print_stats() + +Example +------- + + +An example code can be found `here `_ . + + + +.. _udf_logging_with_inspect: + +Logging from a UDF +===================== + +From time to time, when things are not working as expected, +you may want to log some additional debug information from your UDF, inspect the data that is being processed, +or log warnings. +This can be done using the :py:class:`~openeo.udf.debug.inspect()` function. + +For example: to discover the shape of the data cube chunk that you receive in your UDF function: + +.. code-block:: python + :caption: Sample UDF code with ``inspect()`` logging + :emphasize-lines: 1, 5 + + from openeo.udf import inspect + import xarray + + def apply_datacube(cube: xarray.DataArray, context: dict) -> xarray.DataArray: + inspect(data=[cube.shape], message="UDF logging shape of my cube") + cube.values = 0.0001 * cube.values + return cube + +After the batch job is finished (or failed), you can find this information in the logs of the batch job. +For example (as explained at :ref:`batch-job-logs`), +use :py:class:`BatchJob.logs() ` in a Jupyter notebook session +to retrieve and filter the logs interactively: + +.. image:: _static/images/udf/logging_arrayshape.png + +Which reveals in this example a chunking shape of ``[3, 256, 256]``. + +.. note:: + + Not all kinds of data (types) are accepted/supported by the ``data`` argument of :py:class:`~openeo.udf.debug.inspect`, + so you might have to experiment a bit to make sure the desired debug information is logged as desired. + + +.. _old_udf_api: + +``openeo.UDF`` API and usage changes in version 0.13.0 +======================================================== + +Prior to version 0.13.0 of the openEO Python Client Library, +loading and working with UDFs was a bit inconsistent and cumbersome. + +- The old ``openeo.UDF()`` required an explicit ``runtime`` argument, which was usually ``"Python"``. + In the new :py:class:`openeo.UDF `, the ``runtime`` argument is optional, + and it will be auto-detected (from the source code or file extension) when not given. +- The old ``openeo.UDF()`` required an explicit ``data`` argument, and figuring out the correct + value (e.g. something like ``{"from_parameter": "x"}``) required good knowledge of the openEO API and processes. + With the new :py:class:`openeo.UDF ` it is not necessary anymore to provide + the ``data`` argument. In fact, while the ``data`` argument is only still there for compatibility reasons, + it is unused and it will be removed in a future version. + A deprecation warning will be triggered when ``data`` is given a value. +- :py:meth:`DataCube.apply_dimension() ` has direct UDF support through + ``code`` and ``runtime`` arguments, preceding the more generic and standard ``process`` argument, while + comparable methods like :py:meth:`DataCube.apply() ` + or :py:meth:`DataCube.reduce_dimension() ` + only support a ``process`` argument with no dedicated arguments for UDFs. + + The goal is to improve uniformity across all these methods and use a generic ``process`` argument everywhere + (that also supports a :py:class:`openeo.UDF ` object for UDF use cases). + For now, the ``code``, ``runtime`` and ``version`` arguments are still present + in :py:meth:`DataCube.apply_dimension() ` + as before, but usage is deprecated. + + Simple example to sum it up: + + .. code-block:: python + + udf_code = """ + ... + def apply_datacube(cube, ... + """ + + # Legacy `apply_dimension` usage: still works for now, + # but it will trigger a deprecation warning. + cube.apply_dimension(code=udf_code, runtime="Python", dimension="t") + + # New, preferred approach with a standard `process` argument. + udf = openeo.UDF(udf_code) + cube.apply_dimension(process=udf, dimension="t") + + # Unchanged: usage of other apply/reduce/... methods + cube.apply(process=udf) + cube.reduce_dimension(reducer=udf, dimension="t") diff --git a/_sources/udp.rst.txt b/_sources/udp.rst.txt new file mode 100644 index 000000000..40870a09f --- /dev/null +++ b/_sources/udp.rst.txt @@ -0,0 +1,529 @@ +.. _user-defined-processes: + +############################ +User-Defined Processes (UDP) +############################ + + +Code reuse with user-defined processes +======================================= + +As explained before, processes can be chained together in a process graph +to build a certain algorithm. +Often, you have certain (sub)chains that reoccur in the same process graph +of even in different process graphs or algorithms. + +The openEO API enables you to store such (sub)chains +on the back-end as a so called **user-defined process**. +This allows you to build your own *library of reusable building blocks*. + +.. warning:: + + Do not confuse **user-defined processes** (sometimes abbreviated as UDP) with + **user-defined functions** (UDF) in openEO, which is a mechanism to + inject Python or R scripts as process nodes in a process graph. + See :ref:`user-defined-functions` for more information. + +A user-defined process can not only be constructed from +pre-defined processes provided by the back-end, +but also other user-defined processes. + +Ultimately, the openEO API allows you to publicly expose your user-defined process, +so that other users can invoke it as a service. +This turns your openEO process into a web application +that can be executed using the regular openEO +support for synchronous and asynchronous jobs. + + +Process Parameters +==================== + +User-defined processes are usually **parameterized**, +meaning certain inputs are expected when calling the process. + +For example, if you often have to convert Fahrenheit to Celsius:: + + c = (f - 32) / 1.8 + +you could define a user-defined process ``fahrenheit_to_celsius``, +consisting of two simple mathematical operations +(pre-defined processes ``subtract`` and ``divide``). + +We can represent this in openEO's JSON based format as follows +(don't worry too much about the syntax details of this representation, +the openEO Python client will hide this usually):: + + + { + "subtract32": { + "process_id": "subtract", + "arguments": {"x": {"from_parameter": "fahrenheit"}, "y": 32} + }, + "divide18": { + "process_id": "divide", + "arguments": {"x": {"from_node": "subtract32"}, "y": 1.8}, + "result": true + } + } + + +The important point here is the parameter reference ``{"from_parameter": "fahrenheit"}`` in the subtraction. +When we call this user-defined process we will have to provide a Fahrenheit value. +For example with 70 degrees Fahrenheit (again in openEO JSON format here):: + + { + "process_id": "fahrenheit_to_celsius", + "arguments" {"fahrenheit": 70} + } + + +.. _udp-declaring-parameters: + +Declaring Parameters +--------------------- + +It's good style to declare what parameters your user-defined process expects and supports. +It allows you to document your parameters, define the data type(s) you expect +(the "schema" in openEO-speak) and define default values. + +The openEO Python client lets you define parameters as +:class:`~openeo.api.process.Parameter` instances. +In general you have to specify at least the parameter name, +a description and a schema (to declare the expected parameter type). +The "fahrenheit" parameter from the example above can be defined like this:: + + from openeo.api.process import Parameter + + fahrenheit_param = Parameter( + name="fahrenheit", + description="Degrees Fahrenheit", + schema={"type": "number"} + ) + +To simplify working with parameter schemas, the :class:`~openeo.api.process.Parameter` class +provides a couple of helpers to create common types of parameters. +In the example above, the "fahrenheit" parameter (a number) can also be created more compactly +with the :py:meth:`Parameter.number() ` helper:: + + fahrenheit_param = Parameter.number( + name="fahrenheit", description="Degrees Fahrenheit" + ) + +Some useful parameter helpers (class methods of the :py:class:`~openeo.api.process.Parameter` class): + +- :py:meth:`Parameter.string() ` + to create a string parameter, + e.g. to parameterize the collection id in a ``load_collection`` call in your UDP. +- :py:meth:`Parameter.integer() `, + :py:meth:`Parameter.number() `, + and :py:meth:`Parameter.boolean() ` + to create integer, floating point, or boolean parameters respectively. +- :py:meth:`Parameter.array() ` + to create an array parameter, + e.g. to parameterize the a band selection in a ``load_collection`` call in your UDP. +- :py:meth:`Parameter.datacube() ` + (or its legacy, deprecated cousin :py:meth:`Parameter.raster_cube() `) + to create a data cube parameter. +- :py:meth:`Parameter.bounding_box() ` to create + a parameter for specifying a spatial extent with "west", "south", "east", "north" bounds. +- :py:meth:`Parameter.date() ` and + :py:meth:`Parameter.date_time() ` + to create date or date+time parameters. +- :py:meth:`Parameter.temporal_interval() ` to create + a parameter for specifying a temporal interval with "start" and "end" dates. +- :py:meth:`Parameter.geojson() ` to create + a parameter for specifying a GeoJSON geometry. +- :py:meth:`Parameter.spatial_extent() ` to create + a spatial_extent parameter that is exactly the same as the corresponding parameter in ``load_collection`` and ``load_stac``. + + + +Consult the documentation of these helper class methods for additional features. +For example, declaring a default value for an integer parameter:: + + size_param = Parameter.integer( + name="size", description="Kernel size", default=4 + ) + + + +More advanced parameter schemas +-------------------------------- + +While the helper class methods of :py:class:`~openeo.api.process.Parameter` (discussed above) +cover the most common parameter usage, +you also might need to declare some parameters with a more special or specific schema. +You can do that through the ``schema`` argument +of the basic :py:class:`~openeo.api.process.Parameter()` constructor. +This "schema" argument follows the `JSON Schema draft-07 `_ specification, +which we will briefly illustrate here. + +Basic primitives can be declared through a (required) "type" field, for example: +``{"type": "string"}`` for strings, ``{"type": "integer"}`` for integers, etc. + +Likewise, arrays can be defined with a minimal ``{"type": "array"}``. +In addition, the expected type of the array items can also be specified, +e.g. an array of integers:: + + { + "type": "array", + "items": {"type": "integer"} + } + +Another, more complex type is ``{"type": "object"}`` for parameters +that are like Python dictionaries (or mappings). +For example, to define a bounding box parameter +that should contain certain fields with certain type:: + + { + "type": "object", + "properties": { + "west": {"type": "number"}, + "south": {"type": "number"}, + "east": {"type": "number"}, + "north": {"type": "number"}, + "crs": {"type": "string"} + } + } + +Check the documentation and examples of `JSON Schema draft-07 `_ +for even more features. + +On top of these generic types, the openEO API also defines a couple of custom (sub)types +in the `openeo-processes project `_ +(see the ``meta/subtype-schemas.json`` listing). +For example, the schema of an openEO data cube is:: + + { + "type": "object", + "subtype": "datacube" + } + + + +.. _build_and_store_udp: + +Building and storing user-defined process +============================================= + +There are a couple of ways to build and store user-defined processes: + +- using predefined :ref:`process functions ` +- :ref:`parameterized building of a data cube ` +- :ref:`directly from a well-formatted dictionary ` process graph representation + + + +.. _create_udp_through_process_functions: + +Through "process functions" +---------------------------- + +The openEO Python Client Library defines the +official processes in the :py:mod:`openeo.processes` module, +which can be used to build a process graph as follows:: + + from openeo.processes import subtract, divide + from openeo.api.process import Parameter + + # Define the input parameter. + f = Parameter.number("f", description="Degrees Fahrenheit.") + + # Do the calculations, using the parameter and other values + fahrenheit_to_celsius = divide(x=subtract(x=f, y=32), y=1.8) + + # Store user-defined process in openEO back-end. + connection.save_user_defined_process( + "fahrenheit_to_celsius", + fahrenheit_to_celsius, + parameters=[f] + ) + + +The ``fahrenheit_to_celsius`` object encapsulates the subtract and divide calculations in a symbolic way. +We can pass it directly to :py:meth:`~openeo.rest.connection.Connection.save_user_defined_process`. + + +If you want to inspect its openEO-style process graph representation, +use the :meth:`~openeo.rest.datacube.DataCube.to_json()` +or :meth:`~openeo.rest.datacube.DataCube.print_json()` method:: + + >>> fahrenheit_to_celsius.print_json() + { + "process_graph": { + "subtract1": { + "process_id": "subtract", + "arguments": { + "x": { + "from_parameter": "f" + }, + "y": 32 + } + }, + "divide1": { + "process_id": "divide", + "arguments": { + "x": { + "from_node": "subtract1" + }, + "y": 1.8 + }, + "result": true + } + } + } + + +.. _create_udp_parameterized_cube: + +From a parameterized data cube +------------------------------- + +It's also possible to work with a :class:`~openeo.rest.datacube.DataCube` directly +and parameterize it. +Let's create, as a simple but functional example, a custom ``load_collection`` +with hardcoded collection id and band name +and a parameterized spatial extent (with default):: + + spatial_extent = Parameter( + name="bbox", + schema="object", + default={"west": 3.7, "south": 51.03, "east": 3.75, "north": 51.05} + ) + + cube = connection.load_collection( + "SENTINEL2_L2A_SENTINELHUB", + spatial_extent=spatial_extent, + bands=["B04"] + ) + +Note how we just can pass :class:`~openeo.api.process.Parameter` objects as arguments +while building a :class:`~openeo.rest.datacube.DataCube`. + +.. note:: + + Not all :class:`~openeo.rest.datacube.DataCube` methods/processes properly support + :class:`~openeo.api.process.Parameter` arguments. + Please submit a bug report when you encounter missing or wrong parameterization support. + +We can now store this as a user-defined process called "fancy_load_collection" on the back-end:: + + connection.save_user_defined_process( + "fancy_load_collection", + cube, + parameters=[spatial_extent] + ) + +If you want to inspect its openEO-style process graph representation, +use the :meth:`~openeo.rest.datacube.DataCube.to_json()` +or :meth:`~openeo.rest.datacube.DataCube.print_json()` method:: + + >>> cube.print_json() + { + "loadcollection1": { + "process_id": "load_collection", + "arguments": { + "id": "SENTINEL2_L2A_SENTINELHUB", + "bands": [ + "B04" + ], + "spatial_extent": { + "from_parameter": "bbox" + }, + "temporal_extent": null + }, + "result": true + } + } + + + +.. _create_udp_from_dict: + +Using a predefined dictionary +------------------------------ + +In some (advanced) situation, you might already have +the process graph in dictionary format +(or JSON format, which is very close and easy to transform). +Another developer already prepared it for you, +or you prefer to fine-tune process graphs in a JSON editor. +It is very straightforward to submit this as a user-defined process. + +Say we start from the following Python dictionary, +representing the Fahrenheit to Celsius conversion we discussed before:: + + fahrenheit_to_celsius = { + "subtract1": { + "process_id": "subtract", + "arguments": {"x": {"from_parameter": "f"}, "y": 32} + }, + "divide1": { + "process_id": "divide", + "arguments": {"x": {"from_node": "subtract1"}, "y": 1.8}, + "result": True + }} + +We can store this directly, taking into account that we have to define +a parameter named ``f`` corresponding with the ``{"from_parameter": "f"}`` argument +from the dictionary above:: + + connection.save_user_defined_process( + user_defined_process_id="fahrenheit_to_celsius", + process_graph=fahrenheit_to_celsius, + parameters=[Parameter.number(name="f", description="Degrees Fahrenheit")] + ) + + +Store to a file +--------------- + +Some use cases might require storing the user-defined process in, +for example, a JSON file instead of storing it directly on a back-end. +Use :py:func:`~openeo.rest.udp.build_process_dict` to build a dictionary +compatible with the "process graph with metadata" format of the openEO API +and dump it in JSON format to a file:: + + import json + from openeo.rest.udp import build_process_dict + from openeo.processes import subtract, divide + from openeo.api.process import Parameter + + fahrenheit = Parameter.number("f", description="Degrees Fahrenheit.") + fahrenheit_to_celsius = divide(x=subtract(x=fahrenheit, y=32), y=1.8) + + spec = build_process_dict( + process_id="fahrenheit_to_celsius", + process_graph=fahrenheit_to_celsius, + parameters=[fahrenheit] + ) + + with open("fahrenheit_to_celsius.json", "w") as f: + json.dump(spec, f, indent=2) + +This results in a JSON file like this:: + + { + "id": "fahrenheit_to_celsius", + "process_graph": { + "subtract1": { + "process_id": "subtract", + ... + "parameters": [ + { + "name": "f", + ... + + +.. _evaluate_udp: + +Evaluate user-defined processes +================================ + +Let's evaluate the user-defined processes we defined. + +Because there is no pre-defined +wrapper function for our user-defined process, we use the +generic :func:`openeo.processes.process` function to build a simple +process graph that calls our ``fahrenheit_to_celsius`` process:: + + >>> pg = openeo.processes.process("fahrenheit_to_celsius", f=70) + >>> pg.print_json(indent=None) + {"process_graph": {"fahrenheittocelsius1": {"process_id": "fahrenheit_to_celsius", "arguments": {"f": 70}, "result": true}}} + + >>> res = connection.execute(pg) + >>> print(res) + 21.11111111111111 + + +To use our custom ``fancy_load_collection`` process, +we only have to specify a temporal extent, +and let the predefined and default values do their work. +We will use :func:`~openeo.rest.connection.Connection.datacube_from_process` +to construct a :class:`~openeo.rest.datacube.DataCube` object +which we can process further and download:: + + cube = connection.datacube_from_process("fancy_load_collection") + cube = cube.filter_temporal("2020-09-01", "2020-09-10") + cube.download("fancy.tiff", format="GTiff") + +See :ref:`datacube_from_process` for more information on :func:`~openeo.rest.connection.Connection.datacube_from_process`. + + +.. _udp_example_evi: + +UDP Example: EVI timeseries +========================================== + +In this UDP example, we'll build a reusable UDP ``evi_timeseries`` +to calculate the EVI timeseries for a given geometry. +It's a simplified version of the EVI workflow laid out in :ref:`basic_example_evi_map_and_timeseries`, +focussing on the UDP-specific aspects: defining and using parameters; +building, storing, and finally executing the UDP. + +.. code-block:: python + + import openeo + from openeo.api.process import Parameter + + # Create connection to openEO back-end + connection = openeo.connect("...").authenticate_oidc() + + # Declare the UDP parameters + temporal_extent = Parameter( + name="temporal_extent", + description="The date range to calculate the EVI for.", + schema={"type": "array", "subtype": "temporal-interval"}, + default =["2018-06-15", "2018-06-27"] + ) + geometry = Parameter( + name="geometry", + description="The geometry (a single (multi)polygon or a feature collection of (multi)polygons) of to calculate the EVI for.", + schema={"type": "object", "subtype": "geojson"} + ) + + # Load raw SENTINEL2_L2A data + sentinel2_cube = connection.load_collection( + "SENTINEL2_L2A", + temporal_extent=temporal_extent, + bands=["B02", "B04", "B08"], + ) + + # Extract spectral bands and calculate EVI with the "band math" feature + blue = sentinel2_cube.band("B02") * 0.0001 + red = sentinel2_cube.band("B04") * 0.0001 + nir = sentinel2_cube.band("B08") * 0.0001 + evi = 2.5 * (nir - red) / (nir + 6.0 * red - 7.5 * blue + 1.0) + + evi_aggregation = evi.aggregate_spatial( + geometries=geometry, + reducer="mean", + ) + + # Store the parameterized user-defined process at openEO back-end. + process_id = "evi_timeseries" + connection.save_user_defined_process( + user_defined_process_id=process_id, + process_graph=evi_aggregation, + parameters=[temporal_interval, geometry], + ) + +When this UDP ``evi_timeseries`` is successfully stored on the back-end, +we can use it through :func:`~openeo.rest.connection.Connection.datacube_from_process` +to get the EVI timeseries of a desired geometry and time window: + +.. code-block:: python + + time_window = ["2020-01-01", "2021-12-31"] + geometry = { + "type": "Polygon", + "coordinates": [[[5.1793, 51.2498], [5.1787, 51.2467], [5.1852, 51.2450], [5.1867, 51.2453], [5.1873, 51.2491], [5.1793, 51.2498]]], + } + + evi_timeseries = connection.datacube_from_process( + process_id="evi_timeseries", + temporal_extent=time_window, + geometry=geometry, + ) + + evi_timeseries.download("evi-aggregation.json") diff --git a/_static/alabaster.css b/_static/alabaster.css new file mode 100644 index 000000000..bf03222f7 --- /dev/null +++ b/_static/alabaster.css @@ -0,0 +1,663 @@ +/* -- page layout ----------------------------------------------------------- */ + +body { + font-family: Cantarell, Georgia, serif; + font-size: 17px; + background-color: #fff; + color: #000; + margin: 0; + padding: 0; +} + + +div.document { + width: 1200px; + margin: 30px auto 0 auto; +} + +div.documentwrapper { + float: left; + width: 100%; +} + +div.bodywrapper { + margin: 0 0 0 300px; +} + +div.sphinxsidebar { + width: 300px; + font-size: 14px; + line-height: 1.5; +} + +hr { + border: 1px solid #B1B4B6; +} + +div.body { + background-color: #fff; + color: #3E4349; + padding: 0 30px 0 30px; +} + +div.body > .section { + text-align: left; +} + +div.footer { + width: 1200px; + margin: 20px auto 30px auto; + font-size: 14px; + color: #888; + text-align: right; +} + +div.footer a { + color: #888; +} + +p.caption { + font-family: inherit; + font-size: inherit; +} + + +div.relations { + display: none; +} + + +div.sphinxsidebar { + max-height: 100%; + overflow-y: auto; +} + +div.sphinxsidebar a { + color: #444; + text-decoration: none; + border-bottom: 1px dotted #999; +} + +div.sphinxsidebar a:hover { + border-bottom: 1px solid #999; +} + +div.sphinxsidebarwrapper { + padding: 18px 10px; +} + +div.sphinxsidebarwrapper p.logo { + padding: 0; + margin: -10px 0 0 0px; + text-align: center; +} + +div.sphinxsidebarwrapper h1.logo { + margin-top: -10px; + text-align: center; + margin-bottom: 5px; + text-align: left; +} + +div.sphinxsidebarwrapper h1.logo-name { + margin-top: 0px; +} + +div.sphinxsidebarwrapper p.blurb { + margin-top: 0; + font-style: normal; +} + +div.sphinxsidebar h3, +div.sphinxsidebar h4 { + font-family: Cantarell, Georgia, serif; + color: #444; + font-size: 24px; + font-weight: normal; + margin: 0 0 5px 0; + padding: 0; +} + +div.sphinxsidebar h4 { + font-size: 20px; +} + +div.sphinxsidebar h3 a { + color: #444; +} + +div.sphinxsidebar p.logo a, +div.sphinxsidebar h3 a, +div.sphinxsidebar p.logo a:hover, +div.sphinxsidebar h3 a:hover { + border: none; +} + +div.sphinxsidebar p { + color: #555; + margin: 10px 0; +} + +div.sphinxsidebar ul { + margin: 10px 0; + padding: 0; + color: #000; +} + +div.sphinxsidebar ul li.toctree-l1 > a { + font-size: 120%; +} + +div.sphinxsidebar ul li.toctree-l2 > a { + font-size: 110%; +} + +div.sphinxsidebar input { + border: 1px solid #CCC; + font-family: Cantarell, Georgia, serif; + font-size: 1em; +} + +div.sphinxsidebar #searchbox { + margin: 1em 0; +} + +div.sphinxsidebar .search > div { + display: table-cell; +} + +div.sphinxsidebar hr { + border: none; + height: 1px; + color: #AAA; + background: #AAA; + + text-align: left; + margin-left: 0; + width: 50%; +} + +div.sphinxsidebar .badge { + border-bottom: none; +} + +div.sphinxsidebar .badge:hover { + border-bottom: none; +} + +/* To address an issue with donation coming after search */ +div.sphinxsidebar h3.donation { + margin-top: 10px; +} + +/* -- body styles ----------------------------------------------------------- */ + +a { + color: #004B6B; + text-decoration: underline; +} + +a:hover { + color: #6D4100; + text-decoration: underline; +} + +div.body h1, +div.body h2, +div.body h3, +div.body h4, +div.body h5, +div.body h6 { + font-family: Cantarell, Georgia, serif; + font-weight: normal; + margin: 30px 0px 10px 0px; + padding: 0; +} + +div.body h1 { margin-top: 0; padding-top: 0; font-size: 240%; } +div.body h2 { font-size: 180%; } +div.body h3 { font-size: 150%; } +div.body h4 { font-size: 130%; } +div.body h5 { font-size: 100%; } +div.body h6 { font-size: 100%; } + +a.headerlink { + color: #DDD; + padding: 0 4px; + text-decoration: none; +} + +a.headerlink:hover { + color: #444; + background: #EAEAEA; +} + +div.body p, div.body dd, div.body li { + line-height: 1.4em; +} + +div.admonition { + margin: 20px 0px; + padding: 10px 30px; + background-color: #EEE; + border: 1px solid #CCC; +} + +div.admonition tt.xref, div.admonition code.xref, div.admonition a tt { + background-color: #FBFBFB; + border-bottom: 1px solid #fafafa; +} + +div.admonition p.admonition-title { + font-family: Cantarell, Georgia, serif; + font-weight: normal; + font-size: 24px; + margin: 0 0 10px 0; + padding: 0; + line-height: 1; +} + +div.admonition p.last { + margin-bottom: 0; +} + +dt:target, .highlight { + background: #FAF3E8; +} + +div.warning { + background-color: #FCC; + border: 1px solid #FAA; +} + +div.danger { + background-color: #FCC; + border: 1px solid #FAA; + -moz-box-shadow: 2px 2px 4px #D52C2C; + -webkit-box-shadow: 2px 2px 4px #D52C2C; + box-shadow: 2px 2px 4px #D52C2C; +} + +div.error { + background-color: #FCC; + border: 1px solid #FAA; + -moz-box-shadow: 2px 2px 4px #D52C2C; + -webkit-box-shadow: 2px 2px 4px #D52C2C; + box-shadow: 2px 2px 4px #D52C2C; +} + +div.caution { + background-color: #FCC; + border: 1px solid #FAA; +} + +div.attention { + background-color: #FCC; + border: 1px solid #FAA; +} + +div.important { + background-color: #EEE; + border: 1px solid #CCC; +} + +div.note { + background-color: #EEE; + border: 1px solid #CCC; +} + +div.tip { + background-color: #EEE; + border: 1px solid #CCC; +} + +div.hint { + background-color: #EEE; + border: 1px solid #CCC; +} + +div.seealso { + background-color: #EEE; + border: 1px solid #CCC; +} + +div.topic { + background-color: #EEE; +} + +p.admonition-title { + display: inline; +} + +p.admonition-title:after { + content: ":"; +} + +pre, tt, code { + font-family: 'Liberation Mono', 'Consolas', 'Menlo', 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', monospace; + font-size: 0.9em; +} + +.hll { + background-color: #FFC; + margin: 0 -12px; + padding: 0 12px; + display: block; +} + +img.screenshot { +} + +tt.descname, tt.descclassname, code.descname, code.descclassname { + font-size: 0.95em; +} + +tt.descname, code.descname { + padding-right: 0.08em; +} + +img.screenshot { + -moz-box-shadow: 2px 2px 4px #EEE; + -webkit-box-shadow: 2px 2px 4px #EEE; + box-shadow: 2px 2px 4px #EEE; +} + +table.docutils { + border: 1px solid #888; + -moz-box-shadow: 2px 2px 4px #EEE; + -webkit-box-shadow: 2px 2px 4px #EEE; + box-shadow: 2px 2px 4px #EEE; +} + +table.docutils td, table.docutils th { + border: 1px solid #888; + padding: 0.25em 0.7em; +} + +table.field-list, table.footnote { + border: none; + -moz-box-shadow: none; + -webkit-box-shadow: none; + box-shadow: none; +} + +table.footnote { + margin: 15px 0; + width: 100%; + border: 1px solid #EEE; + background: #FDFDFD; + font-size: 0.9em; +} + +table.footnote + table.footnote { + margin-top: -15px; + border-top: none; +} + +table.field-list th { + padding: 0 0.8em 0 0; +} + +table.field-list td { + padding: 0; +} + +table.field-list p { + margin-bottom: 0.8em; +} + +/* Cloned from + * https://github.com/sphinx-doc/sphinx/commit/ef60dbfce09286b20b7385333d63a60321784e68 + */ +.field-name { + -moz-hyphens: manual; + -ms-hyphens: manual; + -webkit-hyphens: manual; + hyphens: manual; +} + +table.footnote td.label { + width: .1px; + padding: 0.3em 0 0.3em 0.5em; +} + +table.footnote td { + padding: 0.3em 0.5em; +} + +dl { + margin-left: 0; + margin-right: 0; + margin-top: 0; + padding: 0; +} + +dl dd { + margin-left: 30px; +} + +blockquote { + margin: 0 0 0 30px; + padding: 0; +} + +ul, ol { + /* Matches the 30px from the narrow-screen "li > ul" selector below */ + margin: 10px 0 10px 30px; + padding: 0; +} + +pre { + background: unset; + padding: 7px 30px; + margin: 15px 0px; + line-height: 1.3em; +} + +div.viewcode-block:target { + background: #ffd; +} + +dl pre, blockquote pre, li pre { + margin-left: 0; + padding-left: 30px; +} + +tt, code { + background-color: #ecf0f3; + color: #222; + /* padding: 1px 2px; */ +} + +tt.xref, code.xref, a tt { + background-color: #FBFBFB; + border-bottom: 1px solid #fff; +} + +a.reference { + text-decoration: none; + border-bottom: 1px dotted #004B6B; +} + +a.reference:hover { + border-bottom: 1px solid #6D4100; +} + +/* Don't put an underline on images */ +a.image-reference, a.image-reference:hover { + border-bottom: none; +} + +a.footnote-reference { + text-decoration: none; + font-size: 0.7em; + vertical-align: top; + border-bottom: 1px dotted #004B6B; +} + +a.footnote-reference:hover { + border-bottom: 1px solid #6D4100; +} + +a:hover tt, a:hover code { + background: #EEE; +} + +@media screen and (max-width: 1200px) { + + body { + margin: 0; + padding: 20px 30px; + } + + div.documentwrapper { + float: none; + background: #fff; + margin-left: 0; + margin-top: 0; + margin-right: 0; + margin-bottom: 0; + } + + div.sphinxsidebar { + display: block; + float: none; + width: unset; + margin: 50px -30px -20px -30px; + padding: 10px 20px; + background: #333; + color: #FFF; + } + + div.sphinxsidebar h3, div.sphinxsidebar h4, div.sphinxsidebar p, + div.sphinxsidebar h3 a { + color: #fff; + } + + div.sphinxsidebar a { + color: #AAA; + } + + div.sphinxsidebar p.logo { + display: none; + } + + div.document { + width: 100%; + margin: 0; + } + + div.footer { + display: none; + } + + div.bodywrapper { + margin: 0; + } + + div.body { + min-height: 0; + min-width: auto; /* fixes width on small screens, breaks .hll */ + padding: 0; + } + + .hll { + /* "fixes" the breakage */ + width: max-content; + } + + .rtd_doc_footer { + display: none; + } + + .document { + width: auto; + } + + .footer { + width: auto; + } + + .github { + display: none; + } + + ul { + margin-left: 0; + } + + li > ul { + /* Matches the 30px from the "ul, ol" selector above */ + margin-left: 30px; + } +} + + +/* misc. */ + +.revsys-inline { + display: none!important; +} + +/* Hide ugly table cell borders in ..bibliography:: directive output */ +table.docutils.citation, table.docutils.citation td, table.docutils.citation th { + border: none; + /* Below needed in some edge cases; if not applied, bottom shadows appear */ + -moz-box-shadow: none; + -webkit-box-shadow: none; + box-shadow: none; +} + + +/* relbar */ + +.related { + line-height: 30px; + width: 100%; + font-size: 0.9rem; +} + +.related.top { + border-bottom: 1px solid #EEE; + margin-bottom: 20px; +} + +.related.bottom { + border-top: 1px solid #EEE; +} + +.related ul { + padding: 0; + margin: 0; + list-style: none; +} + +.related li { + display: inline; +} + +nav#rellinks { + float: right; +} + +nav#rellinks li+li:before { + content: "|"; +} + +nav#breadcrumbs li+li:before { + content: "\00BB"; +} + +/* Hide certain items when printing */ +@media print { + div.related { + display: none; + } +} + +img.github { + position: absolute; + top: 0; + border: 0; + right: 0; +} \ No newline at end of file diff --git a/_static/basic.css b/_static/basic.css new file mode 100644 index 000000000..d9846dacb --- /dev/null +++ b/_static/basic.css @@ -0,0 +1,914 @@ +/* + * Sphinx stylesheet -- basic theme. + */ + +/* -- main layout ----------------------------------------------------------- */ + +div.clearer { + clear: both; +} + +div.section::after { + display: block; + content: ''; + clear: left; +} + +/* -- relbar ---------------------------------------------------------------- */ + +div.related { + width: 100%; + font-size: 90%; +} + +div.related h3 { + display: none; +} + +div.related ul { + margin: 0; + padding: 0 0 0 10px; + list-style: none; +} + +div.related li { + display: inline; +} + +div.related li.right { + float: right; + margin-right: 5px; +} + +/* -- sidebar --------------------------------------------------------------- */ + +div.sphinxsidebarwrapper { + padding: 10px 5px 0 10px; +} + +div.sphinxsidebar { + float: left; + width: 230px; + margin-left: -100%; + font-size: 90%; + word-wrap: break-word; + overflow-wrap : break-word; +} + +div.sphinxsidebar ul { + list-style: none; +} + +div.sphinxsidebar ul ul, +div.sphinxsidebar ul.want-points { + margin-left: 20px; + list-style: square; +} + +div.sphinxsidebar ul ul { + margin-top: 0; + margin-bottom: 0; +} + +div.sphinxsidebar form { + margin-top: 10px; +} + +div.sphinxsidebar input { + border: 1px solid #98dbcc; + font-family: sans-serif; + font-size: 1em; +} + +div.sphinxsidebar #searchbox form.search { + overflow: hidden; +} + +div.sphinxsidebar #searchbox input[type="text"] { + float: left; + width: 80%; + padding: 0.25em; + box-sizing: border-box; +} + +div.sphinxsidebar #searchbox input[type="submit"] { + float: left; + width: 20%; + border-left: none; + padding: 0.25em; + box-sizing: border-box; +} + + +img { + border: 0; + max-width: 100%; +} + +/* -- search page ----------------------------------------------------------- */ + +ul.search { + margin-top: 10px; +} + +ul.search li { + padding: 5px 0; +} + +ul.search li a { + font-weight: bold; +} + +ul.search li p.context { + color: #888; + margin: 2px 0 0 30px; + text-align: left; +} + +ul.keywordmatches li.goodmatch a { + font-weight: bold; +} + +/* -- index page ------------------------------------------------------------ */ + +table.contentstable { + width: 90%; + margin-left: auto; + margin-right: auto; +} + +table.contentstable p.biglink { + line-height: 150%; +} + +a.biglink { + font-size: 1.3em; +} + +span.linkdescr { + font-style: italic; + padding-top: 5px; + font-size: 90%; +} + +/* -- general index --------------------------------------------------------- */ + +table.indextable { + width: 100%; +} + +table.indextable td { + text-align: left; + vertical-align: top; +} + +table.indextable ul { + margin-top: 0; + margin-bottom: 0; + list-style-type: none; +} + +table.indextable > tbody > tr > td > ul { + padding-left: 0em; +} + +table.indextable tr.pcap { + height: 10px; +} + +table.indextable tr.cap { + margin-top: 10px; + background-color: #f2f2f2; +} + +img.toggler { + margin-right: 3px; + margin-top: 3px; + cursor: pointer; +} + +div.modindex-jumpbox { + border-top: 1px solid #ddd; + border-bottom: 1px solid #ddd; + margin: 1em 0 1em 0; + padding: 0.4em; +} + +div.genindex-jumpbox { + border-top: 1px solid #ddd; + border-bottom: 1px solid #ddd; + margin: 1em 0 1em 0; + padding: 0.4em; +} + +/* -- domain module index --------------------------------------------------- */ + +table.modindextable td { + padding: 2px; + border-collapse: collapse; +} + +/* -- general body styles --------------------------------------------------- */ + +div.body { + min-width: inherit; + max-width: 800px; +} + +div.body p, div.body dd, div.body li, div.body blockquote { + -moz-hyphens: auto; + -ms-hyphens: auto; + -webkit-hyphens: auto; + hyphens: auto; +} + +a.headerlink { + visibility: hidden; +} + +a:visited { + color: #551A8B; +} + +h1:hover > a.headerlink, +h2:hover > a.headerlink, +h3:hover > a.headerlink, +h4:hover > a.headerlink, +h5:hover > a.headerlink, +h6:hover > a.headerlink, +dt:hover > a.headerlink, +caption:hover > a.headerlink, +p.caption:hover > a.headerlink, +div.code-block-caption:hover > a.headerlink { + visibility: visible; +} + +div.body p.caption { + text-align: inherit; +} + +div.body td { + text-align: left; +} + +.first { + margin-top: 0 !important; +} + +p.rubric { + margin-top: 30px; + font-weight: bold; +} + +img.align-left, figure.align-left, .figure.align-left, object.align-left { + clear: left; + float: left; + margin-right: 1em; +} + +img.align-right, figure.align-right, .figure.align-right, object.align-right { + clear: right; + float: right; + margin-left: 1em; +} + +img.align-center, figure.align-center, .figure.align-center, object.align-center { + display: block; + margin-left: auto; + margin-right: auto; +} + +img.align-default, figure.align-default, .figure.align-default { + display: block; + margin-left: auto; + margin-right: auto; +} + +.align-left { + text-align: left; +} + +.align-center { + text-align: center; +} + +.align-default { + text-align: center; +} + +.align-right { + text-align: right; +} + +/* -- sidebars -------------------------------------------------------------- */ + +div.sidebar, +aside.sidebar { + margin: 0 0 0.5em 1em; + border: 1px solid #ddb; + padding: 7px; + background-color: #ffe; + width: 40%; + float: right; + clear: right; + overflow-x: auto; +} + +p.sidebar-title { + font-weight: bold; +} + +nav.contents, +aside.topic, +div.admonition, div.topic, blockquote { + clear: left; +} + +/* -- topics ---------------------------------------------------------------- */ + +nav.contents, +aside.topic, +div.topic { + border: 1px solid #ccc; + padding: 7px; + margin: 10px 0 10px 0; +} + +p.topic-title { + font-size: 1.1em; + font-weight: bold; + margin-top: 10px; +} + +/* -- admonitions ----------------------------------------------------------- */ + +div.admonition { + margin-top: 10px; + margin-bottom: 10px; + padding: 7px; +} + +div.admonition dt { + font-weight: bold; +} + +p.admonition-title { + margin: 0px 10px 5px 0px; + font-weight: bold; +} + +div.body p.centered { + text-align: center; + margin-top: 25px; +} + +/* -- content of sidebars/topics/admonitions -------------------------------- */ + +div.sidebar > :last-child, +aside.sidebar > :last-child, +nav.contents > :last-child, +aside.topic > :last-child, +div.topic > :last-child, +div.admonition > :last-child { + margin-bottom: 0; +} + +div.sidebar::after, +aside.sidebar::after, +nav.contents::after, +aside.topic::after, +div.topic::after, +div.admonition::after, +blockquote::after { + display: block; + content: ''; + clear: both; +} + +/* -- tables ---------------------------------------------------------------- */ + +table.docutils { + margin-top: 10px; + margin-bottom: 10px; + border: 0; + border-collapse: collapse; +} + +table.align-center { + margin-left: auto; + margin-right: auto; +} + +table.align-default { + margin-left: auto; + margin-right: auto; +} + +table caption span.caption-number { + font-style: italic; +} + +table caption span.caption-text { +} + +table.docutils td, table.docutils th { + padding: 1px 8px 1px 5px; + border-top: 0; + border-left: 0; + border-right: 0; + border-bottom: 1px solid #aaa; +} + +th { + text-align: left; + padding-right: 5px; +} + +table.citation { + border-left: solid 1px gray; + margin-left: 1px; +} + +table.citation td { + border-bottom: none; +} + +th > :first-child, +td > :first-child { + margin-top: 0px; +} + +th > :last-child, +td > :last-child { + margin-bottom: 0px; +} + +/* -- figures --------------------------------------------------------------- */ + +div.figure, figure { + margin: 0.5em; + padding: 0.5em; +} + +div.figure p.caption, figcaption { + padding: 0.3em; +} + +div.figure p.caption span.caption-number, +figcaption span.caption-number { + font-style: italic; +} + +div.figure p.caption span.caption-text, +figcaption span.caption-text { +} + +/* -- field list styles ----------------------------------------------------- */ + +table.field-list td, table.field-list th { + border: 0 !important; +} + +.field-list ul { + margin: 0; + padding-left: 1em; +} + +.field-list p { + margin: 0; +} + +.field-name { + -moz-hyphens: manual; + -ms-hyphens: manual; + -webkit-hyphens: manual; + hyphens: manual; +} + +/* -- hlist styles ---------------------------------------------------------- */ + +table.hlist { + margin: 1em 0; +} + +table.hlist td { + vertical-align: top; +} + +/* -- object description styles --------------------------------------------- */ + +.sig { + font-family: 'Consolas', 'Menlo', 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', monospace; +} + +.sig-name, code.descname { + background-color: transparent; + font-weight: bold; +} + +.sig-name { + font-size: 1.1em; +} + +code.descname { + font-size: 1.2em; +} + +.sig-prename, code.descclassname { + background-color: transparent; +} + +.optional { + font-size: 1.3em; +} + +.sig-paren { + font-size: larger; +} + +.sig-param.n { + font-style: italic; +} + +/* C++ specific styling */ + +.sig-inline.c-texpr, +.sig-inline.cpp-texpr { + font-family: unset; +} + +.sig.c .k, .sig.c .kt, +.sig.cpp .k, .sig.cpp .kt { + color: #0033B3; +} + +.sig.c .m, +.sig.cpp .m { + color: #1750EB; +} + +.sig.c .s, .sig.c .sc, +.sig.cpp .s, .sig.cpp .sc { + color: #067D17; +} + + +/* -- other body styles ----------------------------------------------------- */ + +ol.arabic { + list-style: decimal; +} + +ol.loweralpha { + list-style: lower-alpha; +} + +ol.upperalpha { + list-style: upper-alpha; +} + +ol.lowerroman { + list-style: lower-roman; +} + +ol.upperroman { + list-style: upper-roman; +} + +:not(li) > ol > li:first-child > :first-child, +:not(li) > ul > li:first-child > :first-child { + margin-top: 0px; +} + +:not(li) > ol > li:last-child > :last-child, +:not(li) > ul > li:last-child > :last-child { + margin-bottom: 0px; +} + +ol.simple ol p, +ol.simple ul p, +ul.simple ol p, +ul.simple ul p { + margin-top: 0; +} + +ol.simple > li:not(:first-child) > p, +ul.simple > li:not(:first-child) > p { + margin-top: 0; +} + +ol.simple p, +ul.simple p { + margin-bottom: 0; +} + +aside.footnote > span, +div.citation > span { + float: left; +} +aside.footnote > span:last-of-type, +div.citation > span:last-of-type { + padding-right: 0.5em; +} +aside.footnote > p { + margin-left: 2em; +} +div.citation > p { + margin-left: 4em; +} +aside.footnote > p:last-of-type, +div.citation > p:last-of-type { + margin-bottom: 0em; +} +aside.footnote > p:last-of-type:after, +div.citation > p:last-of-type:after { + content: ""; + clear: both; +} + +dl.field-list { + display: grid; + grid-template-columns: fit-content(30%) auto; +} + +dl.field-list > dt { + font-weight: bold; + word-break: break-word; + padding-left: 0.5em; + padding-right: 5px; +} + +dl.field-list > dd { + padding-left: 0.5em; + margin-top: 0em; + margin-left: 0em; + margin-bottom: 0em; +} + +dl { + margin-bottom: 15px; +} + +dd > :first-child { + margin-top: 0px; +} + +dd ul, dd table { + margin-bottom: 10px; +} + +dd { + margin-top: 3px; + margin-bottom: 10px; + margin-left: 30px; +} + +.sig dd { + margin-top: 0px; + margin-bottom: 0px; +} + +.sig dl { + margin-top: 0px; + margin-bottom: 0px; +} + +dl > dd:last-child, +dl > dd:last-child > :last-child { + margin-bottom: 0; +} + +dt:target, span.highlighted { + background-color: #fbe54e; +} + +rect.highlighted { + fill: #fbe54e; +} + +dl.glossary dt { + font-weight: bold; + font-size: 1.1em; +} + +.versionmodified { + font-style: italic; +} + +.system-message { + background-color: #fda; + padding: 5px; + border: 3px solid red; +} + +.footnote:target { + background-color: #ffa; +} + +.line-block { + display: block; + margin-top: 1em; + margin-bottom: 1em; +} + +.line-block .line-block { + margin-top: 0; + margin-bottom: 0; + margin-left: 1.5em; +} + +.guilabel, .menuselection { + font-family: sans-serif; +} + +.accelerator { + text-decoration: underline; +} + +.classifier { + font-style: oblique; +} + +.classifier:before { + font-style: normal; + margin: 0 0.5em; + content: ":"; + display: inline-block; +} + +abbr, acronym { + border-bottom: dotted 1px; + cursor: help; +} + +.translated { + background-color: rgba(207, 255, 207, 0.2) +} + +.untranslated { + background-color: rgba(255, 207, 207, 0.2) +} + +/* -- code displays --------------------------------------------------------- */ + +pre { + overflow: auto; + overflow-y: hidden; /* fixes display issues on Chrome browsers */ +} + +pre, div[class*="highlight-"] { + clear: both; +} + +span.pre { + -moz-hyphens: none; + -ms-hyphens: none; + -webkit-hyphens: none; + hyphens: none; + white-space: nowrap; +} + +div[class*="highlight-"] { + margin: 1em 0; +} + +td.linenos pre { + border: 0; + background-color: transparent; + color: #aaa; +} + +table.highlighttable { + display: block; +} + +table.highlighttable tbody { + display: block; +} + +table.highlighttable tr { + display: flex; +} + +table.highlighttable td { + margin: 0; + padding: 0; +} + +table.highlighttable td.linenos { + padding-right: 0.5em; +} + +table.highlighttable td.code { + flex: 1; + overflow: hidden; +} + +.highlight .hll { + display: block; +} + +div.highlight pre, +table.highlighttable pre { + margin: 0; +} + +div.code-block-caption + div { + margin-top: 0; +} + +div.code-block-caption { + margin-top: 1em; + padding: 2px 5px; + font-size: small; +} + +div.code-block-caption code { + background-color: transparent; +} + +table.highlighttable td.linenos, +span.linenos, +div.highlight span.gp { /* gp: Generic.Prompt */ + user-select: none; + -webkit-user-select: text; /* Safari fallback only */ + -webkit-user-select: none; /* Chrome/Safari */ + -moz-user-select: none; /* Firefox */ + -ms-user-select: none; /* IE10+ */ +} + +div.code-block-caption span.caption-number { + padding: 0.1em 0.3em; + font-style: italic; +} + +div.code-block-caption span.caption-text { +} + +div.literal-block-wrapper { + margin: 1em 0; +} + +code.xref, a code { + background-color: transparent; + font-weight: bold; +} + +h1 code, h2 code, h3 code, h4 code, h5 code, h6 code { + background-color: transparent; +} + +.viewcode-link { + float: right; +} + +.viewcode-back { + float: right; + font-family: sans-serif; +} + +div.viewcode-block:target { + margin: -1px -10px; + padding: 0 10px; +} + +/* -- math display ---------------------------------------------------------- */ + +img.math { + vertical-align: middle; +} + +div.body div.math p { + text-align: center; +} + +span.eqno { + float: right; +} + +span.eqno a.headerlink { + position: absolute; + z-index: 1; +} + +div.math:hover a.headerlink { + visibility: visible; +} + +/* -- printout stylesheet --------------------------------------------------- */ + +@media print { + div.document, + div.documentwrapper, + div.bodywrapper { + margin: 0 !important; + width: 100%; + } + + div.sphinxsidebar, + div.related, + div.footer, + #top-link { + display: none; + } +} \ No newline at end of file diff --git a/_static/custom.css b/_static/custom.css new file mode 100644 index 000000000..5e48835fc --- /dev/null +++ b/_static/custom.css @@ -0,0 +1,139 @@ +/* + * Customization of Alabaster theme + * per https://alabaster.readthedocs.io/en/latest/customization.html#custom-stylesheet + */ + +/* "Quick Search" should be capitalized. */ +div#searchbox h3 { + text-transform: capitalize; +} + +/* Much-improved spacing around code blocks. */ +div.highlight pre { + padding: 1ex; +} + +/* Reduce space between paragraphs for better visual structure */ +p { + margin: 1ex 0; +} + +/* Hide "view source code" links by default, only show on hover */ +dt .viewcode-link { + visibility: hidden; + font-size: 70%; +} + +dt:hover .viewcode-link { + visibility: visible; +} + +/* More breathing space between successive methods */ +dl { + margin-bottom: 1.5em; +} + +dl.field-list > dt { + /* Cleaner aligning of Parameters/Returns/Raises listing with method description paragraphs */ + padding-left: 0; + /* Make Parameters/Returns/Raises labels less dominant */ + text-transform: uppercase; + font-size: 70%; +} + +.sidebar-meta { + font-size: 80%; +} + +div.body h1, div.body h2, div.body h3, div.body h4, div.body h5, div.body h6 { + margin: 1.5em 0 0.5em 0; +} + +div.body h1 { + margin: 0 0 0.5em 0; +} + +.toctree-l1 { + padding: 0.1em 0.5em; + margin-left: -0.5em; +} + +div.sphinxsidebar .toctree-l1 a { + border: none; +} + +.toctree-l1.current { + background-color: #f3f5f7; + border-right: 0.5rem solid #a2cedb; +} + + +div.admonition, +div.versionadded, +.py div.versionchanged, +.py div.deprecated { + padding: 0.5em 1em; + border-style: solid; + border-width: 0 0 0 0.5rem; + border-color: #cccccc; + background-color: #f3f5f7; +} + +div.admonition :first-child, +div.versionadded :first-child, +.py div.versionchanged :first-child, +.py div.deprecated :first-child { + margin-top: 0; +} + + +div.admonition :last-child, +div.versionadded :last-child, +.py div.versionchanged :last-child, +.py div.deprecated :last-child { + margin-bottom: 0; +} + +div.admonition p.admonition-title { + font-size: 80%; + text-transform: uppercase; + font-weight: bold; +} + +div.admonition.note, +div.admonition.tip, +div.admonition.seealso, +div.admonition.hint, +div.versionadded, +.py div.versionchanged { + border-left-color: #42b983; +} + +div.admonition.warning, +div.admonition.attention, +div.admonition.caution, +div.admonition.danger, +div.admonition.error, +div.admonition.important, +.py div.deprecated { + border-left-color: #b9425e; +} + + +pre { + background-color: #e2f0f4; +} + +.highlight-default, .highlight-python, .highlight-pycon, .highlight-shell , .highlight-text { + border-right: 0.5rem solid #a2cedb; +} + +.highlight span.linenos { + color: #888; + font-size: 75%; + padding: 0 1ex; +} + +nav.contents.local { + border: none; +} diff --git a/_static/doctools.js b/_static/doctools.js new file mode 100644 index 000000000..0398ebb9f --- /dev/null +++ b/_static/doctools.js @@ -0,0 +1,149 @@ +/* + * Base JavaScript utilities for all Sphinx HTML documentation. + */ +"use strict"; + +const BLACKLISTED_KEY_CONTROL_ELEMENTS = new Set([ + "TEXTAREA", + "INPUT", + "SELECT", + "BUTTON", +]); + +const _ready = (callback) => { + if (document.readyState !== "loading") { + callback(); + } else { + document.addEventListener("DOMContentLoaded", callback); + } +}; + +/** + * Small JavaScript module for the documentation. + */ +const Documentation = { + init: () => { + Documentation.initDomainIndexTable(); + Documentation.initOnKeyListeners(); + }, + + /** + * i18n support + */ + TRANSLATIONS: {}, + PLURAL_EXPR: (n) => (n === 1 ? 0 : 1), + LOCALE: "unknown", + + // gettext and ngettext don't access this so that the functions + // can safely bound to a different name (_ = Documentation.gettext) + gettext: (string) => { + const translated = Documentation.TRANSLATIONS[string]; + switch (typeof translated) { + case "undefined": + return string; // no translation + case "string": + return translated; // translation exists + default: + return translated[0]; // (singular, plural) translation tuple exists + } + }, + + ngettext: (singular, plural, n) => { + const translated = Documentation.TRANSLATIONS[singular]; + if (typeof translated !== "undefined") + return translated[Documentation.PLURAL_EXPR(n)]; + return n === 1 ? singular : plural; + }, + + addTranslations: (catalog) => { + Object.assign(Documentation.TRANSLATIONS, catalog.messages); + Documentation.PLURAL_EXPR = new Function( + "n", + `return (${catalog.plural_expr})` + ); + Documentation.LOCALE = catalog.locale; + }, + + /** + * helper function to focus on search bar + */ + focusSearchBar: () => { + document.querySelectorAll("input[name=q]")[0]?.focus(); + }, + + /** + * Initialise the domain index toggle buttons + */ + initDomainIndexTable: () => { + const toggler = (el) => { + const idNumber = el.id.substr(7); + const toggledRows = document.querySelectorAll(`tr.cg-${idNumber}`); + if (el.src.substr(-9) === "minus.png") { + el.src = `${el.src.substr(0, el.src.length - 9)}plus.png`; + toggledRows.forEach((el) => (el.style.display = "none")); + } else { + el.src = `${el.src.substr(0, el.src.length - 8)}minus.png`; + toggledRows.forEach((el) => (el.style.display = "")); + } + }; + + const togglerElements = document.querySelectorAll("img.toggler"); + togglerElements.forEach((el) => + el.addEventListener("click", (event) => toggler(event.currentTarget)) + ); + togglerElements.forEach((el) => (el.style.display = "")); + if (DOCUMENTATION_OPTIONS.COLLAPSE_INDEX) togglerElements.forEach(toggler); + }, + + initOnKeyListeners: () => { + // only install a listener if it is really needed + if ( + !DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS && + !DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS + ) + return; + + document.addEventListener("keydown", (event) => { + // bail for input elements + if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return; + // bail with special keys + if (event.altKey || event.ctrlKey || event.metaKey) return; + + if (!event.shiftKey) { + switch (event.key) { + case "ArrowLeft": + if (!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) break; + + const prevLink = document.querySelector('link[rel="prev"]'); + if (prevLink && prevLink.href) { + window.location.href = prevLink.href; + event.preventDefault(); + } + break; + case "ArrowRight": + if (!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) break; + + const nextLink = document.querySelector('link[rel="next"]'); + if (nextLink && nextLink.href) { + window.location.href = nextLink.href; + event.preventDefault(); + } + break; + } + } + + // some keyboard layouts may need Shift to get / + switch (event.key) { + case "/": + if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) break; + Documentation.focusSearchBar(); + event.preventDefault(); + } + }); + }, +}; + +// quick alias for translations +const _ = Documentation.gettext; + +_ready(Documentation.init); diff --git a/_static/documentation_options.js b/_static/documentation_options.js new file mode 100644 index 000000000..b856a726b --- /dev/null +++ b/_static/documentation_options.js @@ -0,0 +1,13 @@ +const DOCUMENTATION_OPTIONS = { + VERSION: '0.36.0', + LANGUAGE: 'en', + COLLAPSE_INDEX: false, + BUILDER: 'html', + FILE_SUFFIX: '.html', + LINK_SUFFIX: '.html', + HAS_SOURCE: true, + SOURCELINK_SUFFIX: '.txt', + NAVIGATION_WITH_KEYS: false, + SHOW_SEARCH_SUMMARY: true, + ENABLE_SEARCH_SHORTCUTS: true, +}; \ No newline at end of file diff --git a/_static/file.png b/_static/file.png new file mode 100644 index 000000000..a858a410e Binary files /dev/null and b/_static/file.png differ diff --git a/_static/github-banner.svg b/_static/github-banner.svg new file mode 100644 index 000000000..c47d9dc0c --- /dev/null +++ b/_static/github-banner.svg @@ -0,0 +1,5 @@ + + + + + diff --git a/_static/images/basics/evi-composite.png b/_static/images/basics/evi-composite.png new file mode 100644 index 000000000..5680bf03e Binary files /dev/null and b/_static/images/basics/evi-composite.png differ diff --git a/_static/images/basics/evi-masked-composite.png b/_static/images/basics/evi-masked-composite.png new file mode 100644 index 000000000..82e5014f6 Binary files /dev/null and b/_static/images/basics/evi-masked-composite.png differ diff --git a/_static/images/basics/evi-timeseries.png b/_static/images/basics/evi-timeseries.png new file mode 100644 index 000000000..106d9f0f6 Binary files /dev/null and b/_static/images/basics/evi-timeseries.png differ diff --git a/_static/images/batchjobs-jupyter-created.png b/_static/images/batchjobs-jupyter-created.png new file mode 100644 index 000000000..8dd25f34c Binary files /dev/null and b/_static/images/batchjobs-jupyter-created.png differ diff --git a/_static/images/batchjobs-jupyter-listing.png b/_static/images/batchjobs-jupyter-listing.png new file mode 100644 index 000000000..6e94d16b1 Binary files /dev/null and b/_static/images/batchjobs-jupyter-listing.png differ diff --git a/_static/images/batchjobs-jupyter-logs.png b/_static/images/batchjobs-jupyter-logs.png new file mode 100644 index 000000000..e9e286e3e Binary files /dev/null and b/_static/images/batchjobs-jupyter-logs.png differ diff --git a/_static/images/batchjobs-webeditor-listing.png b/_static/images/batchjobs-webeditor-listing.png new file mode 100644 index 000000000..4462f6d42 Binary files /dev/null and b/_static/images/batchjobs-webeditor-listing.png differ diff --git a/_static/images/local/local_ndvi.jpg b/_static/images/local/local_ndvi.jpg new file mode 100644 index 000000000..75c523dcc Binary files /dev/null and b/_static/images/local/local_ndvi.jpg differ diff --git a/_static/images/udf/apply-rescaled-histogram.png b/_static/images/udf/apply-rescaled-histogram.png new file mode 100644 index 000000000..07d97647d Binary files /dev/null and b/_static/images/udf/apply-rescaled-histogram.png differ diff --git a/_static/images/udf/logging_arrayshape.png b/_static/images/udf/logging_arrayshape.png new file mode 100644 index 000000000..c8b8535ef Binary files /dev/null and b/_static/images/udf/logging_arrayshape.png differ diff --git a/_static/images/vito-logo.png b/_static/images/vito-logo.png new file mode 100644 index 000000000..64974f447 Binary files /dev/null and b/_static/images/vito-logo.png differ diff --git a/_static/images/welcome.png b/_static/images/welcome.png new file mode 100644 index 000000000..84951d060 Binary files /dev/null and b/_static/images/welcome.png differ diff --git a/_static/language_data.js b/_static/language_data.js new file mode 100644 index 000000000..c7fe6c6fa --- /dev/null +++ b/_static/language_data.js @@ -0,0 +1,192 @@ +/* + * This script contains the language-specific data used by searchtools.js, + * namely the list of stopwords, stemmer, scorer and splitter. + */ + +var stopwords = ["a", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "near", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"]; + + +/* Non-minified version is copied as a separate JS file, if available */ + +/** + * Porter Stemmer + */ +var Stemmer = function() { + + var step2list = { + ational: 'ate', + tional: 'tion', + enci: 'ence', + anci: 'ance', + izer: 'ize', + bli: 'ble', + alli: 'al', + entli: 'ent', + eli: 'e', + ousli: 'ous', + ization: 'ize', + ation: 'ate', + ator: 'ate', + alism: 'al', + iveness: 'ive', + fulness: 'ful', + ousness: 'ous', + aliti: 'al', + iviti: 'ive', + biliti: 'ble', + logi: 'log' + }; + + var step3list = { + icate: 'ic', + ative: '', + alize: 'al', + iciti: 'ic', + ical: 'ic', + ful: '', + ness: '' + }; + + var c = "[^aeiou]"; // consonant + var v = "[aeiouy]"; // vowel + var C = c + "[^aeiouy]*"; // consonant sequence + var V = v + "[aeiou]*"; // vowel sequence + + var mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0 + var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1 + var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1 + var s_v = "^(" + C + ")?" + v; // vowel in stem + + this.stemWord = function (w) { + var stem; + var suffix; + var firstch; + var origword = w; + + if (w.length < 3) + return w; + + var re; + var re2; + var re3; + var re4; + + firstch = w.substr(0,1); + if (firstch == "y") + w = firstch.toUpperCase() + w.substr(1); + + // Step 1a + re = /^(.+?)(ss|i)es$/; + re2 = /^(.+?)([^s])s$/; + + if (re.test(w)) + w = w.replace(re,"$1$2"); + else if (re2.test(w)) + w = w.replace(re2,"$1$2"); + + // Step 1b + re = /^(.+?)eed$/; + re2 = /^(.+?)(ed|ing)$/; + if (re.test(w)) { + var fp = re.exec(w); + re = new RegExp(mgr0); + if (re.test(fp[1])) { + re = /.$/; + w = w.replace(re,""); + } + } + else if (re2.test(w)) { + var fp = re2.exec(w); + stem = fp[1]; + re2 = new RegExp(s_v); + if (re2.test(stem)) { + w = stem; + re2 = /(at|bl|iz)$/; + re3 = new RegExp("([^aeiouylsz])\\1$"); + re4 = new RegExp("^" + C + v + "[^aeiouwxy]$"); + if (re2.test(w)) + w = w + "e"; + else if (re3.test(w)) { + re = /.$/; + w = w.replace(re,""); + } + else if (re4.test(w)) + w = w + "e"; + } + } + + // Step 1c + re = /^(.+?)y$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = new RegExp(s_v); + if (re.test(stem)) + w = stem + "i"; + } + + // Step 2 + re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + suffix = fp[2]; + re = new RegExp(mgr0); + if (re.test(stem)) + w = stem + step2list[suffix]; + } + + // Step 3 + re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + suffix = fp[2]; + re = new RegExp(mgr0); + if (re.test(stem)) + w = stem + step3list[suffix]; + } + + // Step 4 + re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/; + re2 = /^(.+?)(s|t)(ion)$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = new RegExp(mgr1); + if (re.test(stem)) + w = stem; + } + else if (re2.test(w)) { + var fp = re2.exec(w); + stem = fp[1] + fp[2]; + re2 = new RegExp(mgr1); + if (re2.test(stem)) + w = stem; + } + + // Step 5 + re = /^(.+?)e$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = new RegExp(mgr1); + re2 = new RegExp(meq1); + re3 = new RegExp("^" + C + v + "[^aeiouwxy]$"); + if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) + w = stem; + } + re = /ll$/; + re2 = new RegExp(mgr1); + if (re.test(w) && re2.test(w)) { + re = /.$/; + w = w.replace(re,""); + } + + // and turn initial Y back to y + if (firstch == "y") + w = firstch.toLowerCase() + w.substr(1); + return w; + } +} + diff --git a/_static/minus.png b/_static/minus.png new file mode 100644 index 000000000..d96755fda Binary files /dev/null and b/_static/minus.png differ diff --git a/_static/plus.png b/_static/plus.png new file mode 100644 index 000000000..7107cec93 Binary files /dev/null and b/_static/plus.png differ diff --git a/_static/pygments.css b/_static/pygments.css new file mode 100644 index 000000000..0d49244ed --- /dev/null +++ b/_static/pygments.css @@ -0,0 +1,75 @@ +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +.highlight .hll { background-color: #ffffcc } +.highlight { background: #eeffcc; } +.highlight .c { color: #408090; font-style: italic } /* Comment */ +.highlight .err { border: 1px solid #FF0000 } /* Error */ +.highlight .k { color: #007020; font-weight: bold } /* Keyword */ +.highlight .o { color: #666666 } /* Operator */ +.highlight .ch { color: #408090; font-style: italic } /* Comment.Hashbang */ +.highlight .cm { color: #408090; font-style: italic } /* Comment.Multiline */ +.highlight .cp { color: #007020 } /* Comment.Preproc */ +.highlight .cpf { color: #408090; font-style: italic } /* Comment.PreprocFile */ +.highlight .c1 { color: #408090; font-style: italic } /* Comment.Single */ +.highlight .cs { color: #408090; background-color: #fff0f0 } /* Comment.Special */ +.highlight .gd { color: #A00000 } /* Generic.Deleted */ +.highlight .ge { font-style: italic } /* Generic.Emph */ +.highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +.highlight .gr { color: #FF0000 } /* Generic.Error */ +.highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ +.highlight .gi { color: #00A000 } /* Generic.Inserted */ +.highlight .go { color: #333333 } /* Generic.Output */ +.highlight .gp { color: #c65d09; font-weight: bold } /* Generic.Prompt */ +.highlight .gs { font-weight: bold } /* Generic.Strong */ +.highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +.highlight .gt { color: #0044DD } /* Generic.Traceback */ +.highlight .kc { color: #007020; font-weight: bold } /* Keyword.Constant */ +.highlight .kd { color: #007020; font-weight: bold } /* Keyword.Declaration */ +.highlight .kn { color: #007020; font-weight: bold } /* Keyword.Namespace */ +.highlight .kp { color: #007020 } /* Keyword.Pseudo */ +.highlight .kr { color: #007020; font-weight: bold } /* Keyword.Reserved */ +.highlight .kt { color: #902000 } /* Keyword.Type */ +.highlight .m { color: #208050 } /* Literal.Number */ +.highlight .s { color: #4070a0 } /* Literal.String */ +.highlight .na { color: #4070a0 } /* Name.Attribute */ +.highlight .nb { color: #007020 } /* Name.Builtin */ +.highlight .nc { color: #0e84b5; font-weight: bold } /* Name.Class */ +.highlight .no { color: #60add5 } /* Name.Constant */ +.highlight .nd { color: #555555; font-weight: bold } /* Name.Decorator */ +.highlight .ni { color: #d55537; font-weight: bold } /* Name.Entity */ +.highlight .ne { color: #007020 } /* Name.Exception */ +.highlight .nf { color: #06287e } /* Name.Function */ +.highlight .nl { color: #002070; font-weight: bold } /* Name.Label */ +.highlight .nn { color: #0e84b5; font-weight: bold } /* Name.Namespace */ +.highlight .nt { color: #062873; font-weight: bold } /* Name.Tag */ +.highlight .nv { color: #bb60d5 } /* Name.Variable */ +.highlight .ow { color: #007020; font-weight: bold } /* Operator.Word */ +.highlight .w { color: #bbbbbb } /* Text.Whitespace */ +.highlight .mb { color: #208050 } /* Literal.Number.Bin */ +.highlight .mf { color: #208050 } /* Literal.Number.Float */ +.highlight .mh { color: #208050 } /* Literal.Number.Hex */ +.highlight .mi { color: #208050 } /* Literal.Number.Integer */ +.highlight .mo { color: #208050 } /* Literal.Number.Oct */ +.highlight .sa { color: #4070a0 } /* Literal.String.Affix */ +.highlight .sb { color: #4070a0 } /* Literal.String.Backtick */ +.highlight .sc { color: #4070a0 } /* Literal.String.Char */ +.highlight .dl { color: #4070a0 } /* Literal.String.Delimiter */ +.highlight .sd { color: #4070a0; font-style: italic } /* Literal.String.Doc */ +.highlight .s2 { color: #4070a0 } /* Literal.String.Double */ +.highlight .se { color: #4070a0; font-weight: bold } /* Literal.String.Escape */ +.highlight .sh { color: #4070a0 } /* Literal.String.Heredoc */ +.highlight .si { color: #70a0d0; font-style: italic } /* Literal.String.Interpol */ +.highlight .sx { color: #c65d09 } /* Literal.String.Other */ +.highlight .sr { color: #235388 } /* Literal.String.Regex */ +.highlight .s1 { color: #4070a0 } /* Literal.String.Single */ +.highlight .ss { color: #517918 } /* Literal.String.Symbol */ +.highlight .bp { color: #007020 } /* Name.Builtin.Pseudo */ +.highlight .fm { color: #06287e } /* Name.Function.Magic */ +.highlight .vc { color: #bb60d5 } /* Name.Variable.Class */ +.highlight .vg { color: #bb60d5 } /* Name.Variable.Global */ +.highlight .vi { color: #bb60d5 } /* Name.Variable.Instance */ +.highlight .vm { color: #bb60d5 } /* Name.Variable.Magic */ +.highlight .il { color: #208050 } /* Literal.Number.Integer.Long */ \ No newline at end of file diff --git a/_static/searchtools.js b/_static/searchtools.js new file mode 100644 index 000000000..2c774d17a --- /dev/null +++ b/_static/searchtools.js @@ -0,0 +1,632 @@ +/* + * Sphinx JavaScript utilities for the full-text search. + */ +"use strict"; + +/** + * Simple result scoring code. + */ +if (typeof Scorer === "undefined") { + var Scorer = { + // Implement the following function to further tweak the score for each result + // The function takes a result array [docname, title, anchor, descr, score, filename] + // and returns the new score. + /* + score: result => { + const [docname, title, anchor, descr, score, filename, kind] = result + return score + }, + */ + + // query matches the full name of an object + objNameMatch: 11, + // or matches in the last dotted part of the object name + objPartialMatch: 6, + // Additive scores depending on the priority of the object + objPrio: { + 0: 15, // used to be importantResults + 1: 5, // used to be objectResults + 2: -5, // used to be unimportantResults + }, + // Used when the priority is not in the mapping. + objPrioDefault: 0, + + // query found in title + title: 15, + partialTitle: 7, + // query found in terms + term: 5, + partialTerm: 2, + }; +} + +// Global search result kind enum, used by themes to style search results. +class SearchResultKind { + static get index() { return "index"; } + static get object() { return "object"; } + static get text() { return "text"; } + static get title() { return "title"; } +} + +const _removeChildren = (element) => { + while (element && element.lastChild) element.removeChild(element.lastChild); +}; + +/** + * See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions#escaping + */ +const _escapeRegExp = (string) => + string.replace(/[.*+\-?^${}()|[\]\\]/g, "\\$&"); // $& means the whole matched string + +const _displayItem = (item, searchTerms, highlightTerms) => { + const docBuilder = DOCUMENTATION_OPTIONS.BUILDER; + const docFileSuffix = DOCUMENTATION_OPTIONS.FILE_SUFFIX; + const docLinkSuffix = DOCUMENTATION_OPTIONS.LINK_SUFFIX; + const showSearchSummary = DOCUMENTATION_OPTIONS.SHOW_SEARCH_SUMMARY; + const contentRoot = document.documentElement.dataset.content_root; + + const [docName, title, anchor, descr, score, _filename, kind] = item; + + let listItem = document.createElement("li"); + // Add a class representing the item's type: + // can be used by a theme's CSS selector for styling + // See SearchResultKind for the class names. + listItem.classList.add(`kind-${kind}`); + let requestUrl; + let linkUrl; + if (docBuilder === "dirhtml") { + // dirhtml builder + let dirname = docName + "/"; + if (dirname.match(/\/index\/$/)) + dirname = dirname.substring(0, dirname.length - 6); + else if (dirname === "index/") dirname = ""; + requestUrl = contentRoot + dirname; + linkUrl = requestUrl; + } else { + // normal html builders + requestUrl = contentRoot + docName + docFileSuffix; + linkUrl = docName + docLinkSuffix; + } + let linkEl = listItem.appendChild(document.createElement("a")); + linkEl.href = linkUrl + anchor; + linkEl.dataset.score = score; + linkEl.innerHTML = title; + if (descr) { + listItem.appendChild(document.createElement("span")).innerHTML = + " (" + descr + ")"; + // highlight search terms in the description + if (SPHINX_HIGHLIGHT_ENABLED) // set in sphinx_highlight.js + highlightTerms.forEach((term) => _highlightText(listItem, term, "highlighted")); + } + else if (showSearchSummary) + fetch(requestUrl) + .then((responseData) => responseData.text()) + .then((data) => { + if (data) + listItem.appendChild( + Search.makeSearchSummary(data, searchTerms, anchor) + ); + // highlight search terms in the summary + if (SPHINX_HIGHLIGHT_ENABLED) // set in sphinx_highlight.js + highlightTerms.forEach((term) => _highlightText(listItem, term, "highlighted")); + }); + Search.output.appendChild(listItem); +}; +const _finishSearch = (resultCount) => { + Search.stopPulse(); + Search.title.innerText = _("Search Results"); + if (!resultCount) + Search.status.innerText = Documentation.gettext( + "Your search did not match any documents. Please make sure that all words are spelled correctly and that you've selected enough categories." + ); + else + Search.status.innerText = Documentation.ngettext( + "Search finished, found one page matching the search query.", + "Search finished, found ${resultCount} pages matching the search query.", + resultCount, + ).replace('${resultCount}', resultCount); +}; +const _displayNextItem = ( + results, + resultCount, + searchTerms, + highlightTerms, +) => { + // results left, load the summary and display it + // this is intended to be dynamic (don't sub resultsCount) + if (results.length) { + _displayItem(results.pop(), searchTerms, highlightTerms); + setTimeout( + () => _displayNextItem(results, resultCount, searchTerms, highlightTerms), + 5 + ); + } + // search finished, update title and status message + else _finishSearch(resultCount); +}; +// Helper function used by query() to order search results. +// Each input is an array of [docname, title, anchor, descr, score, filename, kind]. +// Order the results by score (in opposite order of appearance, since the +// `_displayNextItem` function uses pop() to retrieve items) and then alphabetically. +const _orderResultsByScoreThenName = (a, b) => { + const leftScore = a[4]; + const rightScore = b[4]; + if (leftScore === rightScore) { + // same score: sort alphabetically + const leftTitle = a[1].toLowerCase(); + const rightTitle = b[1].toLowerCase(); + if (leftTitle === rightTitle) return 0; + return leftTitle > rightTitle ? -1 : 1; // inverted is intentional + } + return leftScore > rightScore ? 1 : -1; +}; + +/** + * Default splitQuery function. Can be overridden in ``sphinx.search`` with a + * custom function per language. + * + * The regular expression works by splitting the string on consecutive characters + * that are not Unicode letters, numbers, underscores, or emoji characters. + * This is the same as ``\W+`` in Python, preserving the surrogate pair area. + */ +if (typeof splitQuery === "undefined") { + var splitQuery = (query) => query + .split(/[^\p{Letter}\p{Number}_\p{Emoji_Presentation}]+/gu) + .filter(term => term) // remove remaining empty strings +} + +/** + * Search Module + */ +const Search = { + _index: null, + _queued_query: null, + _pulse_status: -1, + + htmlToText: (htmlString, anchor) => { + const htmlElement = new DOMParser().parseFromString(htmlString, 'text/html'); + for (const removalQuery of [".headerlink", "script", "style"]) { + htmlElement.querySelectorAll(removalQuery).forEach((el) => { el.remove() }); + } + if (anchor) { + const anchorContent = htmlElement.querySelector(`[role="main"] ${anchor}`); + if (anchorContent) return anchorContent.textContent; + + console.warn( + `Anchored content block not found. Sphinx search tries to obtain it via DOM query '[role=main] ${anchor}'. Check your theme or template.` + ); + } + + // if anchor not specified or not found, fall back to main content + const docContent = htmlElement.querySelector('[role="main"]'); + if (docContent) return docContent.textContent; + + console.warn( + "Content block not found. Sphinx search tries to obtain it via DOM query '[role=main]'. Check your theme or template." + ); + return ""; + }, + + init: () => { + const query = new URLSearchParams(window.location.search).get("q"); + document + .querySelectorAll('input[name="q"]') + .forEach((el) => (el.value = query)); + if (query) Search.performSearch(query); + }, + + loadIndex: (url) => + (document.body.appendChild(document.createElement("script")).src = url), + + setIndex: (index) => { + Search._index = index; + if (Search._queued_query !== null) { + const query = Search._queued_query; + Search._queued_query = null; + Search.query(query); + } + }, + + hasIndex: () => Search._index !== null, + + deferQuery: (query) => (Search._queued_query = query), + + stopPulse: () => (Search._pulse_status = -1), + + startPulse: () => { + if (Search._pulse_status >= 0) return; + + const pulse = () => { + Search._pulse_status = (Search._pulse_status + 1) % 4; + Search.dots.innerText = ".".repeat(Search._pulse_status); + if (Search._pulse_status >= 0) window.setTimeout(pulse, 500); + }; + pulse(); + }, + + /** + * perform a search for something (or wait until index is loaded) + */ + performSearch: (query) => { + // create the required interface elements + const searchText = document.createElement("h2"); + searchText.textContent = _("Searching"); + const searchSummary = document.createElement("p"); + searchSummary.classList.add("search-summary"); + searchSummary.innerText = ""; + const searchList = document.createElement("ul"); + searchList.setAttribute("role", "list"); + searchList.classList.add("search"); + + const out = document.getElementById("search-results"); + Search.title = out.appendChild(searchText); + Search.dots = Search.title.appendChild(document.createElement("span")); + Search.status = out.appendChild(searchSummary); + Search.output = out.appendChild(searchList); + + const searchProgress = document.getElementById("search-progress"); + // Some themes don't use the search progress node + if (searchProgress) { + searchProgress.innerText = _("Preparing search..."); + } + Search.startPulse(); + + // index already loaded, the browser was quick! + if (Search.hasIndex()) Search.query(query); + else Search.deferQuery(query); + }, + + _parseQuery: (query) => { + // stem the search terms and add them to the correct list + const stemmer = new Stemmer(); + const searchTerms = new Set(); + const excludedTerms = new Set(); + const highlightTerms = new Set(); + const objectTerms = new Set(splitQuery(query.toLowerCase().trim())); + splitQuery(query.trim()).forEach((queryTerm) => { + const queryTermLower = queryTerm.toLowerCase(); + + // maybe skip this "word" + // stopwords array is from language_data.js + if ( + stopwords.indexOf(queryTermLower) !== -1 || + queryTerm.match(/^\d+$/) + ) + return; + + // stem the word + let word = stemmer.stemWord(queryTermLower); + // select the correct list + if (word[0] === "-") excludedTerms.add(word.substr(1)); + else { + searchTerms.add(word); + highlightTerms.add(queryTermLower); + } + }); + + if (SPHINX_HIGHLIGHT_ENABLED) { // set in sphinx_highlight.js + localStorage.setItem("sphinx_highlight_terms", [...highlightTerms].join(" ")) + } + + // console.debug("SEARCH: searching for:"); + // console.info("required: ", [...searchTerms]); + // console.info("excluded: ", [...excludedTerms]); + + return [query, searchTerms, excludedTerms, highlightTerms, objectTerms]; + }, + + /** + * execute search (requires search index to be loaded) + */ + _performSearch: (query, searchTerms, excludedTerms, highlightTerms, objectTerms) => { + const filenames = Search._index.filenames; + const docNames = Search._index.docnames; + const titles = Search._index.titles; + const allTitles = Search._index.alltitles; + const indexEntries = Search._index.indexentries; + + // Collect multiple result groups to be sorted separately and then ordered. + // Each is an array of [docname, title, anchor, descr, score, filename, kind]. + const normalResults = []; + const nonMainIndexResults = []; + + _removeChildren(document.getElementById("search-progress")); + + const queryLower = query.toLowerCase().trim(); + for (const [title, foundTitles] of Object.entries(allTitles)) { + if (title.toLowerCase().trim().includes(queryLower) && (queryLower.length >= title.length/2)) { + for (const [file, id] of foundTitles) { + const score = Math.round(Scorer.title * queryLower.length / title.length); + const boost = titles[file] === title ? 1 : 0; // add a boost for document titles + normalResults.push([ + docNames[file], + titles[file] !== title ? `${titles[file]} > ${title}` : title, + id !== null ? "#" + id : "", + null, + score + boost, + filenames[file], + SearchResultKind.title, + ]); + } + } + } + + // search for explicit entries in index directives + for (const [entry, foundEntries] of Object.entries(indexEntries)) { + if (entry.includes(queryLower) && (queryLower.length >= entry.length/2)) { + for (const [file, id, isMain] of foundEntries) { + const score = Math.round(100 * queryLower.length / entry.length); + const result = [ + docNames[file], + titles[file], + id ? "#" + id : "", + null, + score, + filenames[file], + SearchResultKind.index, + ]; + if (isMain) { + normalResults.push(result); + } else { + nonMainIndexResults.push(result); + } + } + } + } + + // lookup as object + objectTerms.forEach((term) => + normalResults.push(...Search.performObjectSearch(term, objectTerms)) + ); + + // lookup as search terms in fulltext + normalResults.push(...Search.performTermsSearch(searchTerms, excludedTerms)); + + // let the scorer override scores with a custom scoring function + if (Scorer.score) { + normalResults.forEach((item) => (item[4] = Scorer.score(item))); + nonMainIndexResults.forEach((item) => (item[4] = Scorer.score(item))); + } + + // Sort each group of results by score and then alphabetically by name. + normalResults.sort(_orderResultsByScoreThenName); + nonMainIndexResults.sort(_orderResultsByScoreThenName); + + // Combine the result groups in (reverse) order. + // Non-main index entries are typically arbitrary cross-references, + // so display them after other results. + let results = [...nonMainIndexResults, ...normalResults]; + + // remove duplicate search results + // note the reversing of results, so that in the case of duplicates, the highest-scoring entry is kept + let seen = new Set(); + results = results.reverse().reduce((acc, result) => { + let resultStr = result.slice(0, 4).concat([result[5]]).map(v => String(v)).join(','); + if (!seen.has(resultStr)) { + acc.push(result); + seen.add(resultStr); + } + return acc; + }, []); + + return results.reverse(); + }, + + query: (query) => { + const [searchQuery, searchTerms, excludedTerms, highlightTerms, objectTerms] = Search._parseQuery(query); + const results = Search._performSearch(searchQuery, searchTerms, excludedTerms, highlightTerms, objectTerms); + + // for debugging + //Search.lastresults = results.slice(); // a copy + // console.info("search results:", Search.lastresults); + + // print the results + _displayNextItem(results, results.length, searchTerms, highlightTerms); + }, + + /** + * search for object names + */ + performObjectSearch: (object, objectTerms) => { + const filenames = Search._index.filenames; + const docNames = Search._index.docnames; + const objects = Search._index.objects; + const objNames = Search._index.objnames; + const titles = Search._index.titles; + + const results = []; + + const objectSearchCallback = (prefix, match) => { + const name = match[4] + const fullname = (prefix ? prefix + "." : "") + name; + const fullnameLower = fullname.toLowerCase(); + if (fullnameLower.indexOf(object) < 0) return; + + let score = 0; + const parts = fullnameLower.split("."); + + // check for different match types: exact matches of full name or + // "last name" (i.e. last dotted part) + if (fullnameLower === object || parts.slice(-1)[0] === object) + score += Scorer.objNameMatch; + else if (parts.slice(-1)[0].indexOf(object) > -1) + score += Scorer.objPartialMatch; // matches in last name + + const objName = objNames[match[1]][2]; + const title = titles[match[0]]; + + // If more than one term searched for, we require other words to be + // found in the name/title/description + const otherTerms = new Set(objectTerms); + otherTerms.delete(object); + if (otherTerms.size > 0) { + const haystack = `${prefix} ${name} ${objName} ${title}`.toLowerCase(); + if ( + [...otherTerms].some((otherTerm) => haystack.indexOf(otherTerm) < 0) + ) + return; + } + + let anchor = match[3]; + if (anchor === "") anchor = fullname; + else if (anchor === "-") anchor = objNames[match[1]][1] + "-" + fullname; + + const descr = objName + _(", in ") + title; + + // add custom score for some objects according to scorer + if (Scorer.objPrio.hasOwnProperty(match[2])) + score += Scorer.objPrio[match[2]]; + else score += Scorer.objPrioDefault; + + results.push([ + docNames[match[0]], + fullname, + "#" + anchor, + descr, + score, + filenames[match[0]], + SearchResultKind.object, + ]); + }; + Object.keys(objects).forEach((prefix) => + objects[prefix].forEach((array) => + objectSearchCallback(prefix, array) + ) + ); + return results; + }, + + /** + * search for full-text terms in the index + */ + performTermsSearch: (searchTerms, excludedTerms) => { + // prepare search + const terms = Search._index.terms; + const titleTerms = Search._index.titleterms; + const filenames = Search._index.filenames; + const docNames = Search._index.docnames; + const titles = Search._index.titles; + + const scoreMap = new Map(); + const fileMap = new Map(); + + // perform the search on the required terms + searchTerms.forEach((word) => { + const files = []; + const arr = [ + { files: terms[word], score: Scorer.term }, + { files: titleTerms[word], score: Scorer.title }, + ]; + // add support for partial matches + if (word.length > 2) { + const escapedWord = _escapeRegExp(word); + if (!terms.hasOwnProperty(word)) { + Object.keys(terms).forEach((term) => { + if (term.match(escapedWord)) + arr.push({ files: terms[term], score: Scorer.partialTerm }); + }); + } + if (!titleTerms.hasOwnProperty(word)) { + Object.keys(titleTerms).forEach((term) => { + if (term.match(escapedWord)) + arr.push({ files: titleTerms[term], score: Scorer.partialTitle }); + }); + } + } + + // no match but word was a required one + if (arr.every((record) => record.files === undefined)) return; + + // found search word in contents + arr.forEach((record) => { + if (record.files === undefined) return; + + let recordFiles = record.files; + if (recordFiles.length === undefined) recordFiles = [recordFiles]; + files.push(...recordFiles); + + // set score for the word in each file + recordFiles.forEach((file) => { + if (!scoreMap.has(file)) scoreMap.set(file, {}); + scoreMap.get(file)[word] = record.score; + }); + }); + + // create the mapping + files.forEach((file) => { + if (!fileMap.has(file)) fileMap.set(file, [word]); + else if (fileMap.get(file).indexOf(word) === -1) fileMap.get(file).push(word); + }); + }); + + // now check if the files don't contain excluded terms + const results = []; + for (const [file, wordList] of fileMap) { + // check if all requirements are matched + + // as search terms with length < 3 are discarded + const filteredTermCount = [...searchTerms].filter( + (term) => term.length > 2 + ).length; + if ( + wordList.length !== searchTerms.size && + wordList.length !== filteredTermCount + ) + continue; + + // ensure that none of the excluded terms is in the search result + if ( + [...excludedTerms].some( + (term) => + terms[term] === file || + titleTerms[term] === file || + (terms[term] || []).includes(file) || + (titleTerms[term] || []).includes(file) + ) + ) + break; + + // select one (max) score for the file. + const score = Math.max(...wordList.map((w) => scoreMap.get(file)[w])); + // add result to the result list + results.push([ + docNames[file], + titles[file], + "", + null, + score, + filenames[file], + SearchResultKind.text, + ]); + } + return results; + }, + + /** + * helper function to return a node containing the + * search summary for a given text. keywords is a list + * of stemmed words. + */ + makeSearchSummary: (htmlText, keywords, anchor) => { + const text = Search.htmlToText(htmlText, anchor); + if (text === "") return null; + + const textLower = text.toLowerCase(); + const actualStartPosition = [...keywords] + .map((k) => textLower.indexOf(k.toLowerCase())) + .filter((i) => i > -1) + .slice(-1)[0]; + const startWithContext = Math.max(actualStartPosition - 120, 0); + + const top = startWithContext === 0 ? "" : "..."; + const tail = startWithContext + 240 < text.length ? "..." : ""; + + let summary = document.createElement("p"); + summary.classList.add("context"); + summary.textContent = top + text.substr(startWithContext, 240).trim() + tail; + + return summary; + }, +}; + +_ready(Search.init); diff --git a/_static/sphinx_highlight.js b/_static/sphinx_highlight.js new file mode 100644 index 000000000..8a96c69a1 --- /dev/null +++ b/_static/sphinx_highlight.js @@ -0,0 +1,154 @@ +/* Highlighting utilities for Sphinx HTML documentation. */ +"use strict"; + +const SPHINX_HIGHLIGHT_ENABLED = true + +/** + * highlight a given string on a node by wrapping it in + * span elements with the given class name. + */ +const _highlight = (node, addItems, text, className) => { + if (node.nodeType === Node.TEXT_NODE) { + const val = node.nodeValue; + const parent = node.parentNode; + const pos = val.toLowerCase().indexOf(text); + if ( + pos >= 0 && + !parent.classList.contains(className) && + !parent.classList.contains("nohighlight") + ) { + let span; + + const closestNode = parent.closest("body, svg, foreignObject"); + const isInSVG = closestNode && closestNode.matches("svg"); + if (isInSVG) { + span = document.createElementNS("http://www.w3.org/2000/svg", "tspan"); + } else { + span = document.createElement("span"); + span.classList.add(className); + } + + span.appendChild(document.createTextNode(val.substr(pos, text.length))); + const rest = document.createTextNode(val.substr(pos + text.length)); + parent.insertBefore( + span, + parent.insertBefore( + rest, + node.nextSibling + ) + ); + node.nodeValue = val.substr(0, pos); + /* There may be more occurrences of search term in this node. So call this + * function recursively on the remaining fragment. + */ + _highlight(rest, addItems, text, className); + + if (isInSVG) { + const rect = document.createElementNS( + "http://www.w3.org/2000/svg", + "rect" + ); + const bbox = parent.getBBox(); + rect.x.baseVal.value = bbox.x; + rect.y.baseVal.value = bbox.y; + rect.width.baseVal.value = bbox.width; + rect.height.baseVal.value = bbox.height; + rect.setAttribute("class", className); + addItems.push({ parent: parent, target: rect }); + } + } + } else if (node.matches && !node.matches("button, select, textarea")) { + node.childNodes.forEach((el) => _highlight(el, addItems, text, className)); + } +}; +const _highlightText = (thisNode, text, className) => { + let addItems = []; + _highlight(thisNode, addItems, text, className); + addItems.forEach((obj) => + obj.parent.insertAdjacentElement("beforebegin", obj.target) + ); +}; + +/** + * Small JavaScript module for the documentation. + */ +const SphinxHighlight = { + + /** + * highlight the search words provided in localstorage in the text + */ + highlightSearchWords: () => { + if (!SPHINX_HIGHLIGHT_ENABLED) return; // bail if no highlight + + // get and clear terms from localstorage + const url = new URL(window.location); + const highlight = + localStorage.getItem("sphinx_highlight_terms") + || url.searchParams.get("highlight") + || ""; + localStorage.removeItem("sphinx_highlight_terms") + url.searchParams.delete("highlight"); + window.history.replaceState({}, "", url); + + // get individual terms from highlight string + const terms = highlight.toLowerCase().split(/\s+/).filter(x => x); + if (terms.length === 0) return; // nothing to do + + // There should never be more than one element matching "div.body" + const divBody = document.querySelectorAll("div.body"); + const body = divBody.length ? divBody[0] : document.querySelector("body"); + window.setTimeout(() => { + terms.forEach((term) => _highlightText(body, term, "highlighted")); + }, 10); + + const searchBox = document.getElementById("searchbox"); + if (searchBox === null) return; + searchBox.appendChild( + document + .createRange() + .createContextualFragment( + '" + ) + ); + }, + + /** + * helper function to hide the search marks again + */ + hideSearchWords: () => { + document + .querySelectorAll("#searchbox .highlight-link") + .forEach((el) => el.remove()); + document + .querySelectorAll("span.highlighted") + .forEach((el) => el.classList.remove("highlighted")); + localStorage.removeItem("sphinx_highlight_terms") + }, + + initEscapeListener: () => { + // only install a listener if it is really needed + if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) return; + + document.addEventListener("keydown", (event) => { + // bail for input elements + if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return; + // bail with special keys + if (event.shiftKey || event.altKey || event.ctrlKey || event.metaKey) return; + if (DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS && (event.key === "Escape")) { + SphinxHighlight.hideSearchWords(); + event.preventDefault(); + } + }); + }, +}; + +_ready(() => { + /* Do not call highlightSearchWords() when we are on the search page. + * It will highlight words from the *previous* search query. + */ + if (typeof Search === "undefined") SphinxHighlight.highlightSearchWords(); + SphinxHighlight.initEscapeListener(); +}); diff --git a/api-processbuilder.html b/api-processbuilder.html new file mode 100644 index 000000000..b06f00cbb --- /dev/null +++ b/api-processbuilder.html @@ -0,0 +1,196 @@ + + + + + + + + <no title> — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +

The ProcessBuilder class +is a helper class that implements +(much like the openEO process functions) +each openEO process as a method. +On top of that it also adds syntactic sugar to support Python operators as well +(e.g. + is translated to the add process).

+
+

Attention

+

As normal user, you should never create a +ProcessBuilder instance +directly.

+

You should only interact with this class inside a callback +function/lambda while building a child callback process graph +as discussed at Callback as a callable.

+
+

For example, let’s start from this simple usage snippet +where we want to reduce the temporal dimension +by taking the temporal mean of each timeseries:

+
def my_reducer(data):
+    return data.mean()
+
+cube.reduce_dimension(reducer=my_reducer, dimension="t")
+
+
+

Note that this my_reducer function has a data argument, +which conceptually corresponds to an array of pixel values +(along the temporal dimension). +However, it’s important to understand that the my_reducer function +is actually not evaluated when you execute your process graph +on an openEO back-end, e.g. as a batch jobs. +Instead, my_reducer is evaluated +while building your process graph client-side +(at the time you execute that cube.reduce_dimension() statement to be precise). +This means that that data argument is actually not a concrete array of EO data, +but some kind of virtual placeholder, +a ProcessBuilder instance, +that keeps track of the operations you intend to do on the EO data.

+

To make that more concrete, it helps to add type hints +which will make it easier to discover what you can do with the argument +(depending on which editor or IDE you are using):

+
from openeo.processes import ProcessBuilder
+
+def my_reducer(data: ProcessBuilder) -> ProcessBuilder:
+    return data.mean()
+
+cube.reduce_dimension(reducer=my_reducer, dimension="t")
+
+
+

Because ProcessBuilder methods +return new ProcessBuilder instances, +and because it support syntactic sugar to use Python operators on it, +and because openeo.process functions +also accept and return ProcessBuilder instances, +we can mix methods, functions and operators in the callback function like this:

+
from openeo.processes import ProcessBuilder, cos
+
+def my_reducer(data: ProcessBuilder) -> ProcessBuilder:
+    return cos(data.mean()) + 1.23
+
+cube.reduce_dimension(reducer=my_reducer, dimension="t")
+
+
+

or compactly, using an anonymous lambda expression:

+
from openeo.processes import cos
+
+cube.reduce_dimension(
+    reducer=lambda data: cos(data.mean())) + 1.23,
+    dimension="t"
+)
+
+
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/api-processes.html b/api-processes.html new file mode 100644 index 000000000..a3d4e5c7c --- /dev/null +++ b/api-processes.html @@ -0,0 +1,4557 @@ + + + + + + + + API: openeo.processes — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +
+

API: openeo.processes

+

The openeo.processes module contains building blocks and helpers +to construct so called “child callbacks” for openEO processes like +openeo.rest.datacube.DataCube.apply() and +openeo.rest.datacube.DataCube.reduce_dimension(), +as discussed at Callback as a callable.

+
+

Note

+

The contents of the openeo.processes module is automatically compiled +from the official openEO process specifications. +Developers that want to fix bugs in, or add implementations to this +module should not touch the file directly, but instead address it in the +upstream openeo-processes repository +or in the internal tooling to generate this file.

+
+ +
+

Functions in openeo.processes

+

The openeo.processes module implements (at top-level) +a regular Python function for each openEO process +(not only the official stable ones, but also experimental ones in “proposal” state).

+

These functions can be used directly as child callback, +for example as follows:

+
from openeo.processes import absolute, max
+
+cube.apply(absolute)
+cube.reduce_dimension(max, dimension="t")
+
+
+

Note how the signatures of the parent DataCube methods +and the callback functions match up:

+ +
+
+openeo.processes.absolute(x)[source]
+

Absolute value

+
+
Parameters:
+

x – A number.

+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The computed absolute value.

+
+
+
+

See also

+

openeo.org documentation on process “absolute”.

+
+
+ +
+
+openeo.processes.add(x, y)[source]
+

Addition of two numbers

+
+
Parameters:
+
    +
  • x – The first summand.

  • +
  • y – The second summand.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The computed sum of the two numbers.

+
+
+
+

See also

+

openeo.org documentation on process “add”.

+
+
+ +
+
+openeo.processes.add_dimension(data, name, label, type=<object object>)[source]
+

Add a new dimension

+
+
Parameters:
+
    +
  • data – A data cube to add the dimension to.

  • +
  • name – Name for the dimension.

  • +
  • label – A dimension label.

  • +
  • type – The type of dimension, defaults to other.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The data cube with a newly added dimension. The new dimension has exactly one dimension label. All +other dimensions remain unchanged.

+
+
+
+

See also

+

openeo.org documentation on process “add_dimension”.

+
+
+ +
+
+openeo.processes.aggregate_spatial(data, geometries, reducer, target_dimension=<object object>, context=<object object>)[source]
+

Zonal statistics for geometries

+
+
Parameters:
+
    +
  • data – A raster data cube with at least two spatial dimensions. The data cube implicitly gets +restricted to the bounds of the geometries as if filter_spatial() would have been used with the same +values for the corresponding parameters immediately before this process.

  • +
  • geometries – Geometries for which the aggregation will be computed. Feature properties are preserved +for vector data cubes and all GeoJSON Features. One value will be computed per label in the dimension of +type geometries, GeoJSON Feature or Geometry. For a FeatureCollection multiple values will be +computed, one value per contained Feature. No values will be computed for empty geometries. For example, +a single value will be computed for a MultiPolygon, but two values will be computed for a +FeatureCollection containing two polygons. - For polygons, the process considers all pixels for +which the point at the pixel center intersects with the corresponding polygon (as defined in the Simple +Features standard by the OGC). - For points, the process considers the closest pixel center. - For +lines (line strings), the process considers all the pixels whose centers are closest to at least one +point on the line. Thus, pixels may be part of multiple geometries and be part of multiple aggregations. +No operation is applied to geometries that are outside of the bounds of the data.

  • +
  • reducer – A reducer to be applied on all values of each geometry. A reducer is a single process such +as mean() or a set of processes, which computes a single value for a list of values, see the category +‘reducer’ for such processes.

  • +
  • target_dimension – By default (which is null), the process only computes the results and doesn’t +add a new dimension. If this parameter contains a new dimension name, the computation also stores +information about the total count of pixels (valid + invalid pixels) and the number of valid pixels (see +is_valid()) for each computed value. These values are added as a new dimension. The new dimension of +type other has the dimension labels value, total_count and valid_count. Fails with a +TargetDimensionExists exception if a dimension with the specified name exists.

  • +
  • context – Additional data to be passed to the reducer.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

A vector data cube with the computed results. Empty geometries still exist but without any +aggregated values (i.e. no-data). The spatial dimensions are replaced by a dimension of type ‘geometries’ +and if target_dimension is not null, a new dimension is added.

+
+
+
+

See also

+

openeo.org documentation on process “aggregate_spatial”.

+
+
+ +
+
+openeo.processes.aggregate_spatial_window(data, reducer, size, boundary=<object object>, align=<object object>, context=<object object>)[source]
+

Zonal statistics for rectangular windows

+
+
Parameters:
+
    +
  • data – A raster data cube with exactly two horizontal spatial dimensions and an arbitrary number of +additional dimensions. The process is applied to all additional dimensions individually.

  • +
  • reducer – A reducer to be applied on the list of values, which contain all pixels covered by the +window. A reducer is a single process such as mean() or a set of processes, which computes a single +value for a list of values, see the category ‘reducer’ for such processes.

  • +
  • size – Window size in pixels along the horizontal spatial dimensions. The first value corresponds to +the x axis, the second value corresponds to the y axis.

  • +
  • boundary – Behavior to apply if the number of values for the axes x and y is not a multiple of +the corresponding value in the size parameter. Options are: - pad (default): pad the data cube with +the no-data value null to fit the required window size. - trim: trim the data cube to fit the required +window size. Set the parameter align to specifies to which corner the data is aligned to.

  • +
  • align – If the data requires padding or trimming (see parameter boundary), specifies to which +corner of the spatial extent the data is aligned to. For example, if the data is aligned to the upper left, +the process pads/trims at the lower-right.

  • +
  • context – Additional data to be passed to the reducer.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

A raster data cube with the newly computed values and the same dimensions. The resolution will +change depending on the chosen values for the size and boundary parameter. It usually decreases for the +dimensions which have the corresponding parameter size set to values greater than 1. The dimension +labels will be set to the coordinate at the center of the window. The other dimension properties (name, +type and reference system) remain unchanged.

+
+
+
+

See also

+

openeo.org documentation on process “aggregate_spatial_window”.

+
+
+ +
+
+openeo.processes.aggregate_temporal(data, intervals, reducer, labels=<object object>, dimension=<object object>, context=<object object>)[source]
+

Temporal aggregations

+
+
Parameters:
+
    +
  • data – A data cube.

  • +
  • intervals – Left-closed temporal intervals, which are allowed to overlap. Each temporal interval in +the array has exactly two elements: 1. The first element is the start of the temporal interval. The +specified time instant is included in the interval. 2. The second element is the end of the temporal +interval. The specified time instant is excluded from the interval. The second element must always be +greater/later than the first element, except when using time without date. Otherwise, a +TemporalExtentEmpty exception is thrown.

  • +
  • reducer – A reducer to be applied for the values contained in each interval. A reducer is a single +process such as mean() or a set of processes, which computes a single value for a list of values, see +the category ‘reducer’ for such processes. Intervals may not contain any values, which for most reducers +leads to no-data (null) values by default.

  • +
  • labels – Distinct labels for the intervals, which can contain dates and/or times. Is only required to +be specified if the values for the start of the temporal intervals are not distinct and thus the default +labels would not be unique. The number of labels and the number of groups need to be equal.

  • +
  • dimension – The name of the temporal dimension for aggregation. All data along the dimension is +passed through the specified reducer. If the dimension is not set or set to null, the data cube is +expected to only have one temporal dimension. Fails with a TooManyDimensions exception if it has more +dimensions. Fails with a DimensionNotAvailable exception if the specified dimension does not exist.

  • +
  • context – Additional data to be passed to the reducer.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

A new data cube with the same dimensions. The dimension properties (name, type, labels, reference +system and resolution) remain unchanged, except for the resolution and dimension labels of the given +temporal dimension.

+
+
+
+

See also

+

openeo.org documentation on process “aggregate_temporal”.

+
+
+ +
+
+openeo.processes.aggregate_temporal_period(data, period, reducer, dimension=<object object>, context=<object object>)[source]
+

Temporal aggregations based on calendar hierarchies

+
+
Parameters:
+
    +
  • data – The source data cube.

  • +
  • period – The time intervals to aggregate. The following pre-defined values are available: * hour: +Hour of the day * day: Day of the year * week: Week of the year * dekad: Ten day periods, counted per +year with three periods per month (day 1 - 10, 11 - 20 and 21 - end of month). The third dekad of the month +can range from 8 to 11 days. For example, the third dekad of a year spans from January 21 till January 31 +(11 days), the fourth dekad spans from February 1 till February 10 (10 days) and the sixth dekad spans from +February 21 till February 28 or February 29 in a leap year (8 or 9 days respectively). * month: Month of +the year * season: Three month periods of the calendar seasons (December - February, March - May, June - +August, September - November). * tropical-season: Six month periods of the tropical seasons (November - +April, May - October). * year: Proleptic years * decade: Ten year periods ([0-to-9 +decade](https://en.wikipedia.org/wiki/Decade#0-to-9_decade)), from a year ending in a 0 to the next year +ending in a 9. * decade-ad: Ten year periods ([1-to-0 +decade](https://en.wikipedia.org/wiki/Decade#1-to-0_decade)) better aligned with the anno Domini (AD) +calendar era, from a year ending in a 1 to the next year ending in a 0.

  • +
  • reducer – A reducer to be applied for the values contained in each period. A reducer is a single +process such as mean() or a set of processes, which computes a single value for a list of values, see +the category ‘reducer’ for such processes. Periods may not contain any values, which for most reducers +leads to no-data (null) values by default.

  • +
  • dimension – The name of the temporal dimension for aggregation. All data along the dimension is +passed through the specified reducer. If the dimension is not set or set to null, the source data cube is +expected to only have one temporal dimension. Fails with a TooManyDimensions exception if it has more +dimensions. Fails with a DimensionNotAvailable exception if the specified dimension does not exist.

  • +
  • context – Additional data to be passed to the reducer.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

A new data cube with the same dimensions. The dimension properties (name, type, labels, reference +system and resolution) remain unchanged, except for the resolution and dimension labels of the given +temporal dimension. The specified temporal dimension has the following dimension labels (YYYY = four- +digit year, MM = two-digit month, DD two-digit day of month): * hour: YYYY-MM-DD-00 - YYYY-MM- +DD-23 * day: YYYY-001 - YYYY-365 * week: YYYY-01 - YYYY-52 * dekad: YYYY-00 - YYYY-36 * +month: YYYY-01 - YYYY-12 * season: YYYY-djf (December - February), YYYY-mam (March - May), +YYYY-jja (June - August), YYYY-son (September - November). * tropical-season: YYYY-ndjfma (November +- April), YYYY-mjjaso (May - October). * year: YYYY * decade: YYY0 * decade-ad: YYY1 The +dimension labels in the new data cube are complete for the whole extent of the source data cube. For +example, if period is set to day and the source data cube has two dimension labels at the beginning of +the year (2020-01-01) and the end of a year (2020-12-31), the process returns a data cube with 365 +dimension labels (2020-001, 2020-002, …, 2020-365). In contrast, if period is set to day and +the source data cube has just one dimension label 2020-01-05, the process returns a data cube with just a +single dimension label (2020-005).

+
+
+
+

See also

+

openeo.org documentation on process “aggregate_temporal_period”.

+
+
+ +
+
+openeo.processes.all(data, ignore_nodata=<object object>)[source]
+

Are all of the values true?

+
+
Parameters:
+
    +
  • data – A set of boolean values.

  • +
  • ignore_nodata – Indicates whether no-data values are ignored or not and ignores them by default.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

Boolean result of the logical operation.

+
+
+
+

See also

+

openeo.org documentation on process “all”.

+
+
+ +
+
+openeo.processes.and_(x, y)[source]
+

Logical AND

+
+
Parameters:
+
    +
  • x – A boolean value.

  • +
  • y – A boolean value.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

Boolean result of the logical AND.

+
+
+
+

See also

+

openeo.org documentation on process “and_”.

+
+
+ +
+
+openeo.processes.anomaly(data, normals, period)[source]
+

Compute anomalies

+
+
Parameters:
+
    +
  • data – A data cube with exactly one temporal dimension and the following dimension labels for the +given period (YYYY = four-digit year, MM = two-digit month, DD two-digit day of month): * hour: +YYYY-MM-DD-00 - YYYY-MM-DD-23 * day: YYYY-001 - YYYY-365 * week: YYYY-01 - YYYY-52 * +dekad: YYYY-00 - YYYY-36 * month: YYYY-01 - YYYY-12 * season: YYYY-djf (December - +February), YYYY-mam (March - May), YYYY-jja (June - August), YYYY-son (September - November). * +tropical-season: YYYY-ndjfma (November - April), YYYY-mjjaso (May - October). * year: YYYY * +decade: YYY0 * decade-ad: YYY1 * single-period / climatology-period: Any +aggregate_temporal_period() can compute such a data cube.

  • +
  • normals – A data cube with normals, e.g. daily, monthly or yearly values computed from a process such +as climatological_normal(). Must contain exactly one temporal dimension with the following dimension +labels for the given period: * hour: 00 - 23 * day: 001 - 365 * week: 01 - 52 * dekad: +00 - 36 * month: 01 - 12 * season: djf (December - February), mam (March - May), jja +(June - August), son (September - November) * tropical-season: ndjfma (November - April), mjjaso +(May - October) * year: Four-digit year numbers * decade: Four-digit year numbers, the last digit being +a 0 * decade-ad: Four-digit year numbers, the last digit being a 1 * single-period / climatology- +period: A single dimension label with any name is expected.

  • +
  • period – Specifies the time intervals available in the normals data cube. The following options are +available: * hour: Hour of the day * day: Day of the year * week: Week of the year * dekad: Ten +day periods, counted per year with three periods per month (day 1 - 10, 11 - 20 and 21 - end of month). The +third dekad of the month can range from 8 to 11 days. For example, the fourth dekad is Feb, 1 - Feb, 10 +each year. * month: Month of the year * season: Three month periods of the calendar seasons (December - +February, March - May, June - August, September - November). * tropical-season: Six month periods of the +tropical seasons (November - April, May - October). * year: Proleptic years * decade: Ten year periods +([0-to-9 decade](https://en.wikipedia.org/wiki/Decade#0-to-9_decade)), from a year ending in a 0 to the +next year ending in a 9. * decade-ad: Ten year periods ([1-to-0 +decade](https://en.wikipedia.org/wiki/Decade#1-to-0_decade)) better aligned with the anno Domini (AD) +calendar era, from a year ending in a 1 to the next year ending in a 0. * single-period / climatology- +period: A single period of arbitrary length

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

A data cube with the same dimensions. The dimension properties (name, type, labels, reference +system and resolution) remain unchanged.

+
+
+
+

See also

+

openeo.org documentation on process “anomaly”.

+
+
+ +
+
+openeo.processes.any(data, ignore_nodata=<object object>)[source]
+

Is at least one value true?

+
+
Parameters:
+
    +
  • data – A set of boolean values.

  • +
  • ignore_nodata – Indicates whether no-data values are ignored or not and ignores them by default.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

Boolean result of the logical operation.

+
+
+
+

See also

+

openeo.org documentation on process “any”.

+
+
+ +
+
+openeo.processes.apply(data, process, context=<object object>)[source]
+

Apply a process to each value

+
+
Parameters:
+
    +
  • data – A data cube.

  • +
  • process – A process that accepts and returns a single value and is applied on each individual value +in the data cube. The process may consist of multiple sub-processes and could, for example, consist of +processes such as absolute() or linear_scale_range().

  • +
  • context – Additional data to be passed to the process.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

A data cube with the newly computed values and the same dimensions. The dimension properties +(name, type, labels, reference system and resolution) remain unchanged.

+
+
+
+

See also

+

openeo.org documentation on process “apply”.

+
+
+ +
+
+openeo.processes.apply_dimension(data, process, dimension, target_dimension=<object object>, context=<object object>)[source]
+

Apply a process to all values along a dimension

+
+
Parameters:
+
    +
  • data – A data cube.

  • +
  • process – Process to be applied on all values along the given dimension. The specified process needs +to accept an array and must return an array with at least one element. A process may consist of multiple +sub-processes.

  • +
  • dimension – The name of the source dimension to apply the process on. Fails with a +DimensionNotAvailable exception if the specified dimension does not exist.

  • +
  • target_dimension – The name of the target dimension or null (the default) to use the source +dimension specified in the parameter dimension. By specifying a target dimension, the source dimension +is removed. The target dimension with the specified name and the type other (see add_dimension()) is +created, if it doesn’t exist yet.

  • +
  • context – Additional data to be passed to the process.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

A data cube with the newly computed values. All dimensions stay the same, except for the +dimensions specified in corresponding parameters. There are three cases how the dimensions can change: 1. +The source dimension is the target dimension: - The (number of) dimensions remain unchanged as the +source dimension is the target dimension. - The source dimension properties name and type remain +unchanged. - The dimension labels, the reference system and the resolution are preserved only if the +number of values in the source dimension is equal to the number of values computed by the process. +Otherwise, all other dimension properties change as defined in the list below. 2. The source dimension is +not the target dimension. The target dimension exists with a single label only: - The number of +dimensions decreases by one as the source dimension is ‘dropped’ and the target dimension is filled with +the processed data that originates from the source dimension. - The target dimension properties name and +type remain unchanged. All other dimension properties change as defined in the list below. 3. The source +dimension is not the target dimension and the latter does not exist: - The number of dimensions remain +unchanged, but the source dimension is replaced with the target dimension. - The target dimension has +the specified name and the type other. All other dimension properties are set as defined in the list below. +Unless otherwise stated above, for the given (target) dimension the following applies: - the number of +dimension labels is equal to the number of values computed by the process, - the dimension labels are +incrementing integers starting from zero, - the resolution changes, and - the reference system is +undefined.

+
+
+
+

See also

+

openeo.org documentation on process “apply_dimension”.

+
+
+ +
+
+openeo.processes.apply_kernel(data, kernel, factor=<object object>, border=<object object>, replace_invalid=<object object>)[source]
+

Apply a spatial convolution with a kernel

+
+
Parameters:
+
    +
  • data – A raster data cube.

  • +
  • kernel – Kernel as a two-dimensional array of weights. The inner level of the nested array aligns +with the x axis and the outer level aligns with the y axis. Each level of the kernel must have an +uneven number of elements, otherwise the process throws a KernelDimensionsUneven exception.

  • +
  • factor – A factor that is multiplied to each value after the kernel has been applied. This is +basically a shortcut for explicitly multiplying each value by a factor afterwards, which is often required +for some kernel-based algorithms such as the Gaussian blur.

  • +
  • border – Determines how the data is extended when the kernel overlaps with the borders. Defaults to +fill the border with zeroes. The following options are available: * numeric value - fill with a user- +defined constant number n: nnnnnn|abcdefgh|nnnnnn (default, with n = 0) * replicate - repeat the +value from the pixel at the border: aaaaaa|abcdefgh|hhhhhh * reflect - mirror/reflect from the border: +fedcba|abcdefgh|hgfedc * reflect_pixel - mirror/reflect from the center of the pixel at the border: +gfedcb|abcdefgh|gfedcb * wrap - repeat/wrap the image: cdefgh|abcdefgh|abcdef

  • +
  • replace_invalid – This parameter specifies the value to replace non-numerical or infinite numerical +values with. By default, those values are replaced with zeroes.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

A data cube with the newly computed values and the same dimensions. The dimension properties +(name, type, labels, reference system and resolution) remain unchanged.

+
+
+
+

See also

+

openeo.org documentation on process “apply_kernel”.

+
+
+ +
+
+openeo.processes.apply_neighborhood(data, process, size, overlap=<object object>, context=<object object>)[source]
+

Apply a process to pixels in a n-dimensional neighborhood

+
+
Parameters:
+
    +
  • data – A raster data cube.

  • +
  • process – Process to be applied on all neighborhoods.

  • +
  • size – Neighborhood sizes along each dimension. This object maps dimension names to either a +physical measure (e.g. 100 m, 10 days) or pixels (e.g. 32 pixels). For dimensions not specified, the +default is to provide all values. Be aware that including all values from overly large dimensions may not +be processed at once.

  • +
  • overlap – Overlap of neighborhoods along each dimension to avoid border effects. By default no +overlap is provided. For instance a temporal dimension can add 1 month before and after a neighborhood. In +the spatial dimensions, this is often a number of pixels. The overlap specified is added before and after, +so an overlap of 8 pixels will add 8 pixels on both sides of the window, so 16 in total. Be aware that +large overlaps increase the need for computational resources and modifying overlapping data in subsequent +operations have no effect.

  • +
  • context – Additional data to be passed to the process.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

A raster data cube with the newly computed values and the same dimensions. The dimension +properties (name, type, labels, reference system and resolution) remain unchanged.

+
+
+
+

See also

+

openeo.org documentation on process “apply_neighborhood”.

+
+
+ +
+
+openeo.processes.apply_polygon(data, polygons, process, mask_value=<object object>, context=<object object>)[source]
+

Apply a process to segments of the data cube

+
+
Parameters:
+
    +
  • data – A data cube.

  • +
  • polygons – A vector data cube containing at least one polygon. The provided vector data can be one of +the following: * A Polygon or MultiPolygon geometry, * a Feature with a Polygon or MultiPolygon +geometry, or * a FeatureCollection containing at least one Feature with Polygon or MultiPolygon +geometries. * Empty geometries are ignored.

  • +
  • process – A process that accepts and returns a single data cube and is applied on each individual sub +data cube. The process may consist of multiple sub-processes.

  • +
  • mask_value – All pixels for which the point at the pixel center does not intersect with the +polygon are replaced with the given value, which defaults to null (no data). It can provide a +distinction between no data values within the polygon and masked pixels outside of it.

  • +
  • context – Additional data to be passed to the process.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

A data cube with the newly computed values and the same dimensions. The dimension properties +(name, type, labels, reference system and resolution) remain unchanged.

+
+
+
+

See also

+

openeo.org documentation on process “apply_polygon”.

+
+
+ +
+
+openeo.processes.arccos(x)[source]
+

Inverse cosine

+
+
Parameters:
+

x – A number.

+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The computed angle in radians.

+
+
+
+

See also

+

openeo.org documentation on process “arccos”.

+
+
+ +
+
+openeo.processes.arcosh(x)[source]
+

Inverse hyperbolic cosine

+
+
Parameters:
+

x – A number.

+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The computed angle in radians.

+
+
+
+

See also

+

openeo.org documentation on process “arcosh”.

+
+
+ +
+
+openeo.processes.arcsin(x)[source]
+

Inverse sine

+
+
Parameters:
+

x – A number.

+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The computed angle in radians.

+
+
+
+

See also

+

openeo.org documentation on process “arcsin”.

+
+
+ +
+
+openeo.processes.arctan(x)[source]
+

Inverse tangent

+
+
Parameters:
+

x – A number.

+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The computed angle in radians.

+
+
+
+

See also

+

openeo.org documentation on process “arctan”.

+
+
+ +
+
+openeo.processes.arctan2(y, x)[source]
+

Inverse tangent of two numbers

+
+
Parameters:
+
    +
  • y – A number to be used as the dividend.

  • +
  • x – A number to be used as the divisor.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The computed angle in radians.

+
+
+
+

See also

+

openeo.org documentation on process “arctan2”.

+
+
+ +
+
+openeo.processes.ard_normalized_radar_backscatter(data, elevation_model=<object object>, contributing_area=<object object>, ellipsoid_incidence_angle=<object object>, noise_removal=<object object>, options=<object object>)[source]
+

CARD4L compliant SAR NRB generation

+
+
Parameters:
+
    +
  • data – The source data cube containing SAR input.

  • +
  • elevation_model – The digital elevation model to use. Set to null (the default) to allow the back- +end to choose, which will improve portability, but reduce reproducibility.

  • +
  • contributing_area – If set to true, a DEM-based local contributing area band named +contributing_area is added. The values are given in square meters.

  • +
  • ellipsoid_incidence_angle – If set to true, an ellipsoidal incidence angle band named +ellipsoid_incidence_angle is added. The values are given in degrees.

  • +
  • noise_removal – If set to false, no noise removal is applied. Defaults to true, which removes +noise.

  • +
  • options – Proprietary options for the backscatter computations. Specifying proprietary options will +reduce portability.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

Backscatter values expressed as gamma0 in linear scale. In addition to the bands +contributing_area and ellipsoid_incidence_angle that can optionally be added with corresponding +parameters, the following bands are always added to the data cube: - mask: A data mask that indicates +which values are valid (1), invalid (0) or contain no-data (null). - local_incidence_angle: A band with +DEM-based local incidence angles in degrees. The data returned is CARD4L compliant with corresponding +metadata.

+
+
+
+

See also

+

openeo.org documentation on process “ard_normalized_radar_backscatter”.

+
+
+ +
+
+openeo.processes.ard_surface_reflectance(data, atmospheric_correction_method, cloud_detection_method, elevation_model=<object object>, atmospheric_correction_options=<object object>, cloud_detection_options=<object object>)[source]
+

CARD4L compliant Surface Reflectance generation

+
+
Parameters:
+
    +
  • data – The source data cube containing multi-spectral optical top of the atmosphere (TOA) +reflectances. There must be a single dimension of type bands available.

  • +
  • atmospheric_correction_method – The atmospheric correction method to use.

  • +
  • cloud_detection_method – The cloud detection method to use. Each method supports detecting different +atmospheric disturbances such as clouds, cloud shadows, aerosols, haze, ozone and/or water vapour in +optical imagery.

  • +
  • elevation_model – The digital elevation model to use. Set to null (the default) to allow the back- +end to choose, which will improve portability, but reduce reproducibility.

  • +
  • atmospheric_correction_options – Proprietary options for the atmospheric correction method. +Specifying proprietary options will reduce portability.

  • +
  • cloud_detection_options – Proprietary options for the cloud detection method. Specifying proprietary +options will reduce portability.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

Data cube containing bottom of atmosphere reflectances for each spectral band in the source data +cube, with atmospheric disturbances like clouds and cloud shadows removed. No-data values (null) are +directly set in the bands. Depending on the methods used, several additional bands will be added to the +data cube: Data cube containing bottom of atmosphere reflectances for each spectral band in the source +data cube, with atmospheric disturbances like clouds and cloud shadows removed. Depending on the methods +used, several additional bands will be added to the data cube: - date (optional): Specifies per-pixel +acquisition timestamps. - incomplete-testing (required): Identifies pixels with a value of 1 for which +the per-pixel tests (at least saturation, cloud and cloud shadows, see CARD4L specification for details) +have not all been successfully completed. Otherwise, the value is 0. - saturation (required) / +saturation_{band} (optional): Indicates where pixels in the input spectral bands are saturated (1) or not +(0). If the saturation is given per band, the band names are saturation_{band} with {band} being the +band name from the source data cube. - cloud, shadow (both required),`aerosol`, haze, ozone, +water_vapor (all optional): Indicates the probability of pixels being an atmospheric disturbance such as +clouds. All bands have values between 0 (clear) and 1, which describes the probability that it is an +atmospheric disturbance. - snow-ice (optional): Points to a file that indicates whether a pixel is +assessed as being snow/ice (1) or not (0). All values describe the probability and must be between 0 and 1. +- land-water (optional): Indicates whether a pixel is assessed as being land (1) or water (0). All values +describe the probability and must be between 0 and 1. - incidence-angle (optional): Specifies per-pixel +incidence angles in degrees. - azimuth (optional): Specifies per-pixel azimuth angles in degrees. - sun- +azimuth: (optional): Specifies per-pixel sun azimuth angles in degrees. - sun-elevation (optional): +Specifies per-pixel sun elevation angles in degrees. - terrain-shadow (optional): Indicates with a value +of 1 whether a pixel is not directly illuminated due to terrain shadowing. Otherwise, the value is 0. - +terrain-occlusion (optional): Indicates with a value of 1 whether a pixel is not visible to the sensor +due to terrain occlusion during off-nadir viewing. Otherwise, the value is 0. - terrain-illumination +(optional): Contains coefficients used for terrain illumination correction are provided for each pixel. +The data returned is CARD4L compliant with corresponding metadata.

+
+
+
+

See also

+

openeo.org documentation on process “ard_surface_reflectance”.

+
+
+ +
+
+openeo.processes.array_append(data, value, label=<object object>)[source]
+

Append a value to an array

+
+
Parameters:
+
    +
  • data – An array.

  • +
  • value – Value to append to the array.

  • +
  • label – If the given array is a labeled array, a new label for the new value should be given. If not +given or null, the array index as string is used as the label. If in any case the label exists, a +LabelExists exception is thrown.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The new array with the value being appended.

+
+
+
+

See also

+

openeo.org documentation on process “array_append”.

+
+
+ +
+
+openeo.processes.array_apply(data, process, context=<object object>)[source]
+

Apply a process to each array element

+
+
Parameters:
+
    +
  • data – An array.

  • +
  • process – A process that accepts and returns a single value and is applied on each individual value +in the array. The process may consist of multiple sub-processes and could, for example, consist of +processes such as absolute() or linear_scale_range().

  • +
  • context – Additional data to be passed to the process.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

An array with the newly computed values. The number of elements are the same as for the original +array.

+
+
+
+

See also

+

openeo.org documentation on process “array_apply”.

+
+
+ +
+
+openeo.processes.array_concat(array1, array2)[source]
+

Merge two arrays

+
+
Parameters:
+
    +
  • array1 – The first array.

  • +
  • array2 – The second array.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The merged array.

+
+
+
+

See also

+

openeo.org documentation on process “array_concat”.

+
+
+ +
+
+openeo.processes.array_contains(data, value)[source]
+

Check whether the array contains a given value

+
+
Parameters:
+
    +
  • data – List to find the value in.

  • +
  • value – Value to find in data. If the value is null, this process returns always false.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

true if the list contains the value, false` otherwise.

+
+
+
+

See also

+

openeo.org documentation on process “array_contains”.

+
+
+ +
+
+openeo.processes.array_create(data=<object object>, repeat=<object object>)[source]
+

Create an array

+
+
Parameters:
+
    +
  • data – A (native) array to fill the newly created array with. Defaults to an empty array.

  • +
  • repeat – The number of times the (native) array specified in data is repeatedly added after each +other to the new array being created. Defaults to 1.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The newly created array.

+
+
+
+

See also

+

openeo.org documentation on process “array_create”.

+
+
+ +
+
+openeo.processes.array_create_labeled(data, labels)[source]
+

Create a labeled array

+
+
Parameters:
+
    +
  • data – An array of values to be used.

  • +
  • labels – An array of labels to be used.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The newly created labeled array.

+
+
+
+

See also

+

openeo.org documentation on process “array_create_labeled”.

+
+
+ +
+
+openeo.processes.array_element(data, index=<object object>, label=<object object>, return_nodata=<object object>)[source]
+

Get an element from an array

+
+
Parameters:
+
    +
  • data – An array.

  • +
  • index – The zero-based index of the element to retrieve.

  • +
  • label – The label of the element to retrieve. Throws an ArrayNotLabeled exception, if the given +array is not a labeled array and this parameter is set.

  • +
  • return_nodata – By default this process throws an ArrayElementNotAvailable exception if the index +or label is invalid. If you want to return null instead, set this flag to true.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The value of the requested element.

+
+
+
+

See also

+

openeo.org documentation on process “array_element”.

+
+
+ +
+
+openeo.processes.array_filter(data, condition, context=<object object>)[source]
+

Filter an array based on a condition

+
+
Parameters:
+
    +
  • data – An array.

  • +
  • condition – A condition that is evaluated against each value, index and/or label in the array. Only +the array elements for which the condition returns true are preserved.

  • +
  • context – Additional data to be passed to the condition.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

An array filtered by the specified condition. The number of elements are less than or equal +compared to the original array.

+
+
+
+

See also

+

openeo.org documentation on process “array_filter”.

+
+
+ +
+
+openeo.processes.array_find(data, value, reverse=<object object>)[source]
+

Get the index for a value in an array

+
+
Parameters:
+
    +
  • data – List to find the value in.

  • +
  • value – Value to find in data. If the value is null, this process returns always null.

  • +
  • reverse – By default, this process finds the index of the first match. To return the index of the +last match instead, set this flag to true.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The index of the first element with the specified value. If no element was found, null is +returned.

+
+
+
+

See also

+

openeo.org documentation on process “array_find”.

+
+
+ +
+
+openeo.processes.array_find_label(data, label)[source]
+

Get the index for a label in a labeled array

+
+
Parameters:
+
    +
  • data – List to find the label in.

  • +
  • label – Label to find in data.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The index of the element with the specified label assigned. If no such label was found, null is +returned.

+
+
+
+

See also

+

openeo.org documentation on process “array_find_label”.

+
+
+ +
+
+openeo.processes.array_interpolate_linear(data)[source]
+

One-dimensional linear interpolation for arrays

+
+
Parameters:
+

data – An array of numbers and no-data values. If the given array is a labeled array, the labels +must have a natural/inherent label order and the process expects the labels to be sorted accordingly. This +is the default behavior in openEO for spatial and temporal dimensions.

+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

An array with no-data values being replaced with interpolated values. If not at least 2 numerical +values are available in the array, the array stays the same.

+
+
+
+

See also

+

openeo.org documentation on process “array_interpolate_linear”.

+
+
+ +
+
+openeo.processes.array_labels(data)[source]
+

Get the labels for an array

+
+
Parameters:
+

data – An array.

+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The labels or indices as array.

+
+
+
+

See also

+

openeo.org documentation on process “array_labels”.

+
+
+ +
+
+openeo.processes.array_modify(data, values, index, length=<object object>)[source]
+

Change the content of an array (remove, insert, update)

+
+
Parameters:
+
    +
  • data – The array to modify.

  • +
  • values – The values to insert into the data array.

  • +
  • index – The index in the data array of the element to insert the value(s) before. If the index is +greater than the number of elements in the data array, the process throws an ArrayElementNotAvailable +exception. To insert after the last element, there are two options: 1. Use the simpler processes +array_append() to append a single value or array_concat() to append multiple values. 2. Specify the +number of elements in the array. You can retrieve the number of elements with the process count(), +having the parameter condition set to true.

  • +
  • length – The number of elements in the data array to remove (or replace) starting from the given +index. If the array contains fewer elements, the process simply removes all elements up to the end.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

An array with values added, updated or removed.

+
+
+
+

See also

+

openeo.org documentation on process “array_modify”.

+
+
+ +
+
+openeo.processes.arsinh(x)[source]
+

Inverse hyperbolic sine

+
+
Parameters:
+

x – A number.

+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The computed angle in radians.

+
+
+
+

See also

+

openeo.org documentation on process “arsinh”.

+
+
+ +
+
+openeo.processes.artanh(x)[source]
+

Inverse hyperbolic tangent

+
+
Parameters:
+

x – A number.

+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The computed angle in radians.

+
+
+
+

See also

+

openeo.org documentation on process “artanh”.

+
+
+ +
+
+openeo.processes.atmospheric_correction(data, method, elevation_model=<object object>, options=<object object>)[source]
+

Apply atmospheric correction

+
+
Parameters:
+
    +
  • data – Data cube containing multi-spectral optical top of atmosphere reflectances to be corrected.

  • +
  • method – The atmospheric correction method to use. To get reproducible results, you have to set a +specific method. Set to null to allow the back-end to choose, which will improve portability, but reduce +reproducibility as you may get different results if you run the processes multiple times.

  • +
  • elevation_model – The digital elevation model to use. Set to null (the default) to allow the back- +end to choose, which will improve portability, but reduce reproducibility.

  • +
  • options – Proprietary options for the atmospheric correction method. Specifying proprietary options +will reduce portability.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

Data cube containing bottom of atmosphere reflectances.

+
+
+
+

See also

+

openeo.org documentation on process “atmospheric_correction”.

+
+
+ +
+
+openeo.processes.between(x, min, max, exclude_max=<object object>)[source]
+

Between comparison

+
+
Parameters:
+
    +
  • x – The value to check.

  • +
  • min – Lower boundary (inclusive) to check against.

  • +
  • max – Upper boundary (inclusive) to check against.

  • +
  • exclude_max – Exclude the upper boundary max if set to true. Defaults to false.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

true if x is between the specified bounds, otherwise false.

+
+
+
+

See also

+

openeo.org documentation on process “between”.

+
+
+ +
+
+openeo.processes.ceil(x)[source]
+

Round fractions up

+
+
Parameters:
+

x – A number to round up.

+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The number rounded up.

+
+
+
+

See also

+

openeo.org documentation on process “ceil”.

+
+
+ +
+
+openeo.processes.climatological_normal(data, period, climatology_period=<object object>)[source]
+

Compute climatology normals

+
+
Parameters:
+
    +
  • data – A data cube with exactly one temporal dimension. The data cube must span at least the temporal +interval specified in the parameter climatology-period. Seasonal periods may span two consecutive years, +e.g. temporal winter that includes months December, January and February. If the required months before the +actual climate period are available, the season is taken into account. If not available, the first season +is not taken into account and the seasonal mean is based on one year less than the other seasonal normals. +The incomplete season at the end of the last year is never taken into account.

  • +
  • period – The time intervals to aggregate the average value for. The following pre-defined frequencies +are supported: * day: Day of the year * month: Month of the year * climatology-period: The period +specified in the climatology-period. * season: Three month periods of the calendar seasons (December - +February, March - May, June - August, September - November). * tropical-season: Six month periods of the +tropical seasons (November - April, May - October).

  • +
  • climatology_period – The climatology period as a closed temporal interval. The first element of the +array is the first year to be fully included in the temporal interval. The second element is the last year +to be fully included in the temporal interval. The default climatology period is from 1981 until 2010 +(both inclusive) right now, but this might be updated over time to what is commonly used in climatology. If +you don’t want to keep your research to be reproducible, please explicitly specify a period.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

A data cube with the same dimensions. The dimension properties (name, type, labels, reference +system and resolution) remain unchanged, except for the resolution and dimension labels of the temporal +dimension. The temporal dimension has the following dimension labels: * day: 001 - 365 * month: +01 - 12 * climatology-period: climatology-period * season: djf (December - February), mam +(March - May), jja (June - August), son (September - November) * tropical-season: ndjfma (November +- April), mjjaso (May - October)

+
+
+
+

See also

+

openeo.org documentation on process “climatological_normal”.

+
+
+ +
+
+openeo.processes.clip(x, min, max)[source]
+

Clip a value between a minimum and a maximum

+
+
Parameters:
+
    +
  • x – A number.

  • +
  • min – Minimum value. If the value is lower than this value, the process will return the value of this +parameter.

  • +
  • max – Maximum value. If the value is greater than this value, the process will return the value of +this parameter.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The value clipped to the specified range.

+
+
+
+

See also

+

openeo.org documentation on process “clip”.

+
+
+ +
+
+openeo.processes.cloud_detection(data, method, options=<object object>)[source]
+

Create cloud masks

+
+
Parameters:
+
    +
  • data – The source data cube containing multi-spectral optical top of the atmosphere (TOA) +reflectances on which to perform cloud detection.

  • +
  • method – The cloud detection method to use. To get reproducible results, you have to set a specific +method. Set to null to allow the back-end to choose, which will improve portability, but reduce +reproducibility as you may get different results if you run the processes multiple times.

  • +
  • options – Proprietary options for the cloud detection method. Specifying proprietary options will +reduce portability.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

A data cube with bands for the atmospheric disturbances. Each of the masks contains values between +0 and 1. The data cube has the same spatial and temporal dimensions as the source data cube and a dimension +that contains a dimension label for each of the supported/considered atmospheric disturbance.

+
+
+
+

See also

+

openeo.org documentation on process “cloud_detection”.

+
+
+ +
+
+openeo.processes.constant(x)[source]
+

Define a constant value

+
+
Parameters:
+

x – The value of the constant.

+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The value of the constant.

+
+
+
+

See also

+

openeo.org documentation on process “constant”.

+
+
+ +
+
+openeo.processes.cos(x)[source]
+

Cosine

+
+
Parameters:
+

x – An angle in radians.

+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The computed cosine of x.

+
+
+
+

See also

+

openeo.org documentation on process “cos”.

+
+
+ +
+
+openeo.processes.cosh(x)[source]
+

Hyperbolic cosine

+
+
Parameters:
+

x – An angle in radians.

+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The computed hyperbolic cosine of x.

+
+
+
+

See also

+

openeo.org documentation on process “cosh”.

+
+
+ +
+
+openeo.processes.count(data, condition=<object object>, context=<object object>)[source]
+

Count the number of elements

+
+
Parameters:
+
    +
  • data – An array with elements of any data type.

  • +
  • condition – A condition consists of one or more processes, which in the end return a boolean value. +It is evaluated against each element in the array. An element is counted only if the condition returns +true. Defaults to count valid elements in a list (see is_valid()). Setting this parameter to boolean +true counts all elements in the list. false is not a valid value for this parameter.

  • +
  • context – Additional data to be passed to the condition.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The counted number of elements.

+
+
+
+

See also

+

openeo.org documentation on process “count”.

+
+
+ +
+
+openeo.processes.create_data_cube()[source]
+

Create an empty data cube

+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

An empty data cube with no dimensions.

+
+
+
+

See also

+

openeo.org documentation on process “create_data_cube”.

+
+
+ +
+
+openeo.processes.cummax(data, ignore_nodata=<object object>)[source]
+

Cumulative maxima

+
+
Parameters:
+
    +
  • data – An array of numbers.

  • +
  • ignore_nodata – Indicates whether no-data values are ignored or not and ignores them by default. +Setting this flag to false considers no-data values so that null is set for all the following elements.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

An array with the computed cumulative maxima.

+
+
+
+

See also

+

openeo.org documentation on process “cummax”.

+
+
+ +
+
+openeo.processes.cummin(data, ignore_nodata=<object object>)[source]
+

Cumulative minima

+
+
Parameters:
+
    +
  • data – An array of numbers.

  • +
  • ignore_nodata – Indicates whether no-data values are ignored or not and ignores them by default. +Setting this flag to false considers no-data values so that null is set for all the following elements.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

An array with the computed cumulative minima.

+
+
+
+

See also

+

openeo.org documentation on process “cummin”.

+
+
+ +
+
+openeo.processes.cumproduct(data, ignore_nodata=<object object>)[source]
+

Cumulative products

+
+
Parameters:
+
    +
  • data – An array of numbers.

  • +
  • ignore_nodata – Indicates whether no-data values are ignored or not and ignores them by default. +Setting this flag to false considers no-data values so that null is set for all the following elements.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

An array with the computed cumulative products.

+
+
+
+

See also

+

openeo.org documentation on process “cumproduct”.

+
+
+ +
+
+openeo.processes.cumsum(data, ignore_nodata=<object object>)[source]
+

Cumulative sums

+
+
Parameters:
+
    +
  • data – An array of numbers.

  • +
  • ignore_nodata – Indicates whether no-data values are ignored or not and ignores them by default. +Setting this flag to false considers no-data values so that null is set for all the following elements.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

An array with the computed cumulative sums.

+
+
+
+

See also

+

openeo.org documentation on process “cumsum”.

+
+
+ +
+
+openeo.processes.date_between(x, min, max, exclude_max=<object object>)[source]
+

Between comparison for dates and times

+
+
Parameters:
+
    +
  • x – The value to check.

  • +
  • min – Lower boundary (inclusive) to check against.

  • +
  • max – Upper boundary (inclusive) to check against.

  • +
  • exclude_max – Exclude the upper boundary max if set to true. Defaults to false.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

true if x is between the specified bounds, otherwise false.

+
+
+
+

See also

+

openeo.org documentation on process “date_between”.

+
+
+ +
+
+openeo.processes.date_difference(date1, date2, unit=<object object>)[source]
+

Computes the difference between two time instants

+
+
Parameters:
+
    +
  • date1 – The base date, optionally with a time component.

  • +
  • date2 – The other date, optionally with a time component.

  • +
  • unit – The unit for the returned value. The following units are available: - millisecond - second - +leap seconds are ignored in computations. - minute - hour - day - month - year

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

Returns the difference between date1 and date2 in the given unit (seconds by default), including a +fractional part if required. For comparison purposes this means: - If date1 < date2, the returned +value is positive. - If date1 = date2, the returned value is 0. - If date1 > date2, the returned +value is negative.

+
+
+
+

See also

+

openeo.org documentation on process “date_difference”.

+
+
+ +
+
+openeo.processes.date_shift(date, value, unit)[source]
+

Manipulates dates and times by addition or subtraction

+
+
Parameters:
+
    +
  • date – The date (and optionally time) to manipulate. If the given date doesn’t include the time, the +process assumes that the time component is 00:00:00Z (i.e. midnight, in UTC). The millisecond part of the +time is optional and defaults to 0 if not given.

  • +
  • value – The period of time in the unit given that is added (positive numbers) or subtracted (negative +numbers). The value 0 doesn’t have any effect.

  • +
  • unit – The unit for the value given. The following pre-defined units are available: - millisecond: +Milliseconds - second: Seconds - leap seconds are ignored in computations. - minute: Minutes - hour: Hours +- day: Days - changes only the the day part of a date - week: Weeks (equivalent to 7 days) - month: Months +- year: Years Manipulations with the unit year, month, week or day do never change the time. If +any of the manipulations result in an invalid date or time, the corresponding part is rounded down to the +next valid date or time respectively. For example, adding a month to 2020-01-31 would result in +2020-02-29.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The manipulated date. If a time component was given in the parameter date, the time component is +returned with the date.

+
+
+
+

See also

+

openeo.org documentation on process “date_shift”.

+
+
+ +
+
+openeo.processes.dimension_labels(data, dimension)[source]
+

Get the dimension labels

+
+
Parameters:
+
    +
  • data – The data cube.

  • +
  • dimension – The name of the dimension to get the labels for.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The labels as an array.

+
+
+
+

See also

+

openeo.org documentation on process “dimension_labels”.

+
+
+ +
+
+openeo.processes.divide(x, y)[source]
+

Division of two numbers

+
+
Parameters:
+
    +
  • x – The dividend.

  • +
  • y – The divisor.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The computed result.

+
+
+
+

See also

+

openeo.org documentation on process “divide”.

+
+
+ +
+
+openeo.processes.drop_dimension(data, name)[source]
+

Remove a dimension

+
+
Parameters:
+
    +
  • data – The data cube to drop a dimension from.

  • +
  • name – Name of the dimension to drop.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

A data cube without the specified dimension. The number of dimensions decreases by one, but the +dimension properties (name, type, labels, reference system and resolution) for all other dimensions remain +unchanged.

+
+
+
+

See also

+

openeo.org documentation on process “drop_dimension”.

+
+
+ +
+
+openeo.processes.e()[source]
+

Euler’s number (e)

+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The numerical value of Euler’s number.

+
+
+
+

See also

+

openeo.org documentation on process “e”.

+
+
+ +
+
+openeo.processes.eq(x, y, delta=<object object>, case_sensitive=<object object>)[source]
+

Equal to comparison

+
+
Parameters:
+
    +
  • x – First operand.

  • +
  • y – Second operand.

  • +
  • delta – Only applicable for comparing two numbers. If this optional parameter is set to a positive +non-zero number the equality of two numbers is checked against a delta value. This is especially useful to +circumvent problems with floating-point inaccuracy in machine-based computation. This option is basically +an alias for the following computation: lte(abs(minus([x, y]), delta)

  • +
  • case_sensitive – Only applicable for comparing two strings. Case sensitive comparison can be disabled +by setting this parameter to false.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

true if x is equal to y, null if any operand is null, otherwise false.

+
+
+
+

See also

+

openeo.org documentation on process “eq”.

+
+
+ +
+
+openeo.processes.exp(p)[source]
+

Exponentiation to the base e

+
+
Parameters:
+

p – The numerical exponent.

+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The computed value for e raised to the power of p.

+
+
+
+

See also

+

openeo.org documentation on process “exp”.

+
+
+ +
+
+openeo.processes.extrema(data, ignore_nodata=<object object>)[source]
+

Minimum and maximum values

+
+
Parameters:
+
    +
  • data – An array of numbers.

  • +
  • ignore_nodata – Indicates whether no-data values are ignored or not. Ignores them by default. Setting +this flag to false considers no-data values so that an array with two null values is returned if any +value is such a value.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

An array containing the minimum and maximum values for the specified numbers. The first element is +the minimum, the second element is the maximum. If the input array is empty both elements are set to +null.

+
+
+
+

See also

+

openeo.org documentation on process “extrema”.

+
+
+ +
+
+openeo.processes.filter_bands(data, bands=<object object>, wavelengths=<object object>)[source]
+

Filter the bands by names

+
+
Parameters:
+
    +
  • data – A data cube with bands.

  • +
  • bands – A list of band names. Either the unique band name (metadata field name in bands) or one of +the common band names (metadata field common_name in bands). If the unique band name and the common name +conflict, the unique band name has a higher priority. The order of the specified array defines the order +of the bands in the data cube. If multiple bands match a common name, all matched bands are included in the +original order.

  • +
  • wavelengths – A list of sub-lists with each sub-list consisting of two elements. The first element is +the minimum wavelength and the second element is the maximum wavelength. Wavelengths are specified in +micrometers (μm). The order of the specified array defines the order of the bands in the data cube. If +multiple bands match the wavelengths, all matched bands are included in the original order.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

A data cube limited to a subset of its original bands. The dimensions and dimension properties +(name, type, labels, reference system and resolution) remain unchanged, except that the dimension of type +bands has less (or the same) dimension labels.

+
+
+
+

See also

+

openeo.org documentation on process “filter_bands”.

+
+
+ +
+
+openeo.processes.filter_bbox(data, extent)[source]
+

Spatial filter using a bounding box

+
+
Parameters:
+
    +
  • data – A data cube.

  • +
  • extent – A bounding box, which may include a vertical axis (see base and height).

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

A data cube restricted to the bounding box. The dimensions and dimension properties (name, type, +labels, reference system and resolution) remain unchanged, except that the spatial dimensions have less (or +the same) dimension labels.

+
+
+
+

See also

+

openeo.org documentation on process “filter_bbox”.

+
+
+ +
+
+openeo.processes.filter_labels(data, condition, dimension, context=<object object>)[source]
+

Filter dimension labels based on a condition

+
+
Parameters:
+
    +
  • data – A data cube.

  • +
  • condition – A condition that is evaluated against each dimension label in the specified dimension. A +dimension label and the corresponding data is preserved for the given dimension, if the condition returns +true.

  • +
  • dimension – The name of the dimension to filter on. Fails with a DimensionNotAvailable exception if +the specified dimension does not exist.

  • +
  • context – Additional data to be passed to the condition.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

A data cube with the same dimensions. The dimension properties (name, type, labels, reference +system and resolution) remain unchanged, except that the given dimension has less (or the same) dimension +labels.

+
+
+
+

See also

+

openeo.org documentation on process “filter_labels”.

+
+
+ +
+
+openeo.processes.filter_spatial(data, geometries)[source]
+

Spatial filter raster data cubes using geometries

+
+
Parameters:
+
    +
  • data – A raster data cube.

  • +
  • geometries – One or more geometries used for filtering, given as GeoJSON or vector data cube. If +multiple geometries are provided, the union of them is used. Empty geometries are ignored. Limits the data +cube to the bounding box of the given geometries. No implicit masking gets applied. To mask the pixels of +the data cube use mask_polygon().

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

A raster data cube restricted to the specified geometries. The dimensions and dimension properties +(name, type, labels, reference system and resolution) remain unchanged, except that the spatial dimensions +have less (or the same) dimension labels.

+
+
+
+

See also

+

openeo.org documentation on process “filter_spatial”.

+
+
+ +
+
+openeo.processes.filter_temporal(data, extent, dimension=<object object>)[source]
+

Temporal filter based on temporal intervals

+
+
Parameters:
+
    +
  • data – A data cube.

  • +
  • extent – Left-closed temporal interval, i.e. an array with exactly two elements: 1. The first +element is the start of the temporal interval. The specified time instant is included in the interval. +2. The second element is the end of the temporal interval. The specified time instant is excluded from +the interval. The second element must always be greater/later than the first element. Otherwise, a +TemporalExtentEmpty exception is thrown. Also supports unbounded intervals by setting one of the +boundaries to null, but never both.

  • +
  • dimension – The name of the temporal dimension to filter on. If no specific dimension is specified, +the filter applies to all temporal dimensions. Fails with a DimensionNotAvailable exception if the +specified dimension does not exist.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

A data cube restricted to the specified temporal extent. The dimensions and dimension properties +(name, type, labels, reference system and resolution) remain unchanged, except that the temporal dimensions +(determined by dimensions parameter) may have less dimension labels.

+
+
+
+

See also

+

openeo.org documentation on process “filter_temporal”.

+
+
+ +
+
+openeo.processes.filter_vector(data, geometries, relation=<object object>)[source]
+

Spatial vector filter using geometries

+
+
Parameters:
+
    +
  • data – A vector data cube with the candidate geometries.

  • +
  • geometries – One or more base geometries used for filtering, given as vector data cube. If multiple +base geometries are provided, the union of them is used.

  • +
  • relation – The spatial filter predicate for comparing the geometries provided through (a) +geometries (base geometries) and (b) data (candidate geometries).

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

A vector data cube restricted to the specified geometries. The dimensions and dimension properties +(name, type, labels, reference system and resolution) remain unchanged, except that the geometries +dimension has less (or the same) dimension labels.

+
+
+
+

See also

+

openeo.org documentation on process “filter_vector”.

+
+
+ +
+
+openeo.processes.first(data, ignore_nodata=<object object>)[source]
+

First element

+
+
Parameters:
+
    +
  • data – An array with elements of any data type.

  • +
  • ignore_nodata – Indicates whether no-data values are ignored or not. Ignores them by default. Setting +this flag to false considers no-data values so that null is returned if the first value is such a +value.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The first element of the input array.

+
+
+
+

See also

+

openeo.org documentation on process “first”.

+
+
+ +
+
+openeo.processes.fit_curve(data, parameters, function, ignore_nodata=<object object>)[source]
+

Curve fitting

+
+
Parameters:
+
    +
  • data – A labeled array, the labels correspond to the variable y and the values correspond to the +variable x.

  • +
  • parameters – Defined the number of parameters for the model function and provides an initial guess +for them. At least one parameter is required.

  • +
  • function – The model function. It must take the parameters to fit as array through the first argument +and the independent variable x as the second argument. It is recommended to store the model function as +a user-defined process on the back-end to be able to re-use the model function with the computed optimal +values for the parameters afterwards.

  • +
  • ignore_nodata – Indicates whether no-data values are ignored or not. Ignores them by default. Setting +this flag to false considers no-data values so that null is passed to the model function.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

An array with the optimal values for the parameters.

+
+
+
+

See also

+

openeo.org documentation on process “fit_curve”.

+
+
+ +
+
+openeo.processes.flatten_dimensions(data, dimensions, target_dimension, label_separator=<object object>)[source]
+

Combine multiple dimensions into a single dimension

+
+
Parameters:
+
    +
  • data – A data cube.

  • +
  • dimensions – The names of the dimension to combine. The order of the array defines the order in which +the dimension labels and values are combined (see the example in the process description). Fails with a +DimensionNotAvailable exception if at least one of the specified dimensions does not exist.

  • +
  • target_dimension – The name of the new target dimension. A new dimensions will be created with the +given names and type other (see add_dimension()). Fails with a TargetDimensionExists exception if a +dimension with the specified name exists.

  • +
  • label_separator – The string that will be used as a separator for the concatenated dimension labels. +To unambiguously revert the dimension labels with the process unflatten_dimension(), the given string +must not be contained in any of the dimension labels.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

A data cube with the new shape. The dimension properties (name, type, labels, reference system and +resolution) for all other dimensions remain unchanged.

+
+
+
+

See also

+

openeo.org documentation on process “flatten_dimensions”.

+
+
+ +
+
+openeo.processes.floor(x)[source]
+

Round fractions down

+
+
Parameters:
+

x – A number to round down.

+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The number rounded down.

+
+
+
+

See also

+

openeo.org documentation on process “floor”.

+
+
+ +
+
+openeo.processes.gt(x, y)[source]
+

Greater than comparison

+
+
Parameters:
+
    +
  • x – First operand.

  • +
  • y – Second operand.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

true if x is strictly greater than y or null if any operand is null, otherwise false.

+
+
+
+

See also

+

openeo.org documentation on process “gt”.

+
+
+ +
+
+openeo.processes.gte(x, y)[source]
+

Greater than or equal to comparison

+
+
Parameters:
+
    +
  • x – First operand.

  • +
  • y – Second operand.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

true if x is greater than or equal to y, null if any operand is null, otherwise false.

+
+
+
+

See also

+

openeo.org documentation on process “gte”.

+
+
+ +
+
+openeo.processes.if_(value, accept, reject=<object object>)[source]
+

If-Then-Else conditional

+
+
Parameters:
+
    +
  • value – A boolean value.

  • +
  • accept – A value that is returned if the boolean value is true.

  • +
  • reject – A value that is returned if the boolean value is not true. Defaults to null.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

Either the accept or reject argument depending on the given boolean value.

+
+
+
+

See also

+

openeo.org documentation on process “if_”.

+
+
+ +
+
+openeo.processes.inspect(data, message=<object object>, code=<object object>, level=<object object>)[source]
+

Add information to the logs

+
+
Parameters:
+
    +
  • data – Data to log.

  • +
  • message – A message to send in addition to the data.

  • +
  • code – A label to help identify one or more log entries originating from this process in the list of +all log entries. It can help to group or filter log entries and is usually not unique.

  • +
  • level – The severity level of this message, defaults to info.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The data as passed to the data parameter without any modification.

+
+
+
+

See also

+

openeo.org documentation on process “inspect”.

+
+
+ +
+
+openeo.processes.int(x)[source]
+

Integer part of a number

+
+
Parameters:
+

x – A number.

+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

Integer part of the number.

+
+
+
+

See also

+

openeo.org documentation on process “int”.

+
+
+ +
+
+openeo.processes.is_infinite(x)[source]
+

Value is an infinite number

+
+
Parameters:
+

x – The data to check.

+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

true if the data is an infinite number, otherwise false.

+
+
+
+

See also

+

openeo.org documentation on process “is_infinite”.

+
+
+ +
+
+openeo.processes.is_nan(x)[source]
+

Value is not a number

+
+
Parameters:
+

x – The data to check.

+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

Returns true for NaN and all non-numeric data types, otherwise returns false.

+
+
+
+

See also

+

openeo.org documentation on process “is_nan”.

+
+
+ +
+
+openeo.processes.is_nodata(x)[source]
+

Value is a no-data value

+
+
Parameters:
+

x – The data to check.

+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

true if the data is a no-data value, otherwise false.

+
+
+
+

See also

+

openeo.org documentation on process “is_nodata”.

+
+
+ +
+
+openeo.processes.is_valid(x)[source]
+

Value is valid data

+
+
Parameters:
+

x – The data to check.

+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

true if the data is valid, otherwise false.

+
+
+
+

See also

+

openeo.org documentation on process “is_valid”.

+
+
+ +
+
+openeo.processes.last(data, ignore_nodata=<object object>)[source]
+

Last element

+
+
Parameters:
+
    +
  • data – An array with elements of any data type.

  • +
  • ignore_nodata – Indicates whether no-data values are ignored or not. Ignores them by default. Setting +this flag to false considers no-data values so that null is returned if the last value is such a value.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The last element of the input array.

+
+
+
+

See also

+

openeo.org documentation on process “last”.

+
+
+ +
+
+openeo.processes.linear_scale_range(x, inputMin, inputMax, outputMin=<object object>, outputMax=<object object>)[source]
+

Linear transformation between two ranges

+
+
Parameters:
+
    +
  • x – A number to transform. The number gets clipped to the bounds specified in inputMin and +inputMax.

  • +
  • inputMin – Minimum value the input can obtain.

  • +
  • inputMax – Maximum value the input can obtain.

  • +
  • outputMin – Minimum value of the desired output range.

  • +
  • outputMax – Maximum value of the desired output range.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The transformed number.

+
+
+
+

See also

+

openeo.org documentation on process “linear_scale_range”.

+
+
+ +
+
+openeo.processes.ln(x)[source]
+

Natural logarithm

+
+
Parameters:
+

x – A number to compute the natural logarithm for.

+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The computed natural logarithm.

+
+
+
+

See also

+

openeo.org documentation on process “ln”.

+
+
+ +
+
+openeo.processes.load_collection(id, spatial_extent, temporal_extent, bands=<object object>, properties=<object object>)[source]
+

Load a collection

+
+
Parameters:
+
    +
  • id – The collection id.

  • +
  • spatial_extent – Limits the data to load from the collection to the specified bounding box or +polygons. * For raster data, the process loads the pixel into the data cube if the point at the pixel +center intersects with the bounding box or any of the polygons (as defined in the Simple Features standard +by the OGC). * For vector data, the process loads the geometry into the data cube if the geometry is fully +within the bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). +Empty geometries may only be in the data cube if no spatial extent has been provided. The GeoJSON can be +one of the following feature types: * A Polygon or MultiPolygon geometry, * a Feature with a +Polygon or MultiPolygon geometry, or * a FeatureCollection containing at least one Feature with +Polygon or MultiPolygon geometries. * Empty geometries are ignored. Set this parameter to null to +set no limit for the spatial extent. Be careful with this when loading large datasets! It is recommended to +use this parameter instead of using filter_bbox() or filter_spatial() directly after loading +unbounded data.

  • +
  • temporal_extent – Limits the data to load from the collection to the specified left-closed temporal +interval. Applies to all temporal dimensions. The interval has to be specified as an array with exactly two +elements: 1. The first element is the start of the temporal interval. The specified time instant is +included in the interval. 2. The second element is the end of the temporal interval. The specified time +instant is excluded from the interval. The second element must always be greater/later than the first +element. Otherwise, a TemporalExtentEmpty exception is thrown. Also supports unbounded intervals by +setting one of the boundaries to null, but never both. Set this parameter to null to set no limit for +the temporal extent. Be careful with this when loading large datasets! It is recommended to use this +parameter instead of using filter_temporal() directly after loading unbounded data.

  • +
  • bands – Only adds the specified bands into the data cube so that bands that don’t match the list of +band names are not available. Applies to all dimensions of type bands. Either the unique band name +(metadata field name in bands) or one of the common band names (metadata field common_name in bands) +can be specified. If the unique band name and the common name conflict, the unique band name has a higher +priority. The order of the specified array defines the order of the bands in the data cube. If multiple +bands match a common name, all matched bands are included in the original order. It is recommended to use +this parameter instead of using filter_bands() directly after loading unbounded data.

  • +
  • properties – Limits the data by metadata properties to include only data in the data cube which all +given conditions return true for (AND operation). Specify key-value-pairs with the key being the name of +the metadata property, which can be retrieved with the openEO Data Discovery for Collections. The value +must be a condition (user-defined process) to be evaluated against the collection metadata, see the +example.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

A data cube for further processing. The dimensions and dimension properties (name, type, labels, +reference system and resolution) correspond to the collection’s metadata, but the dimension labels are +restricted as specified in the parameters.

+
+
+
+

See also

+

openeo.org documentation on process “load_collection”.

+
+
+ +
+
+openeo.processes.load_geojson(data, properties=<object object>)[source]
+

Converts GeoJSON into a vector data cube

+
+
Parameters:
+
    +
  • data – A GeoJSON object to convert into a vector data cube. The GeoJSON type GeometryCollection is +not supported. Each geometry in the GeoJSON data results in a dimension label in the geometries +dimension.

  • +
  • properties – A list of properties from the GeoJSON file to construct an additional dimension from. A +new dimension with the name properties and type other is created if at least one property is provided. +Only applies for GeoJSON Features and FeatureCollections. Missing values are generally set to no-data +(null). Depending on the number of properties provided, the process creates the dimension differently: +- Single property with scalar values: A single dimension label with the name of the property and a single +value per geometry. - Single property of type array: The dimension labels correspond to the array indices. +There are as many values and labels per geometry as there are for the largest array. - Multiple properties +with scalar values: The dimension labels correspond to the property names. There are as many values and +labels per geometry as there are properties provided here.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

A vector data cube containing the geometries, either one or two dimensional.

+
+
+
+

See also

+

openeo.org documentation on process “load_geojson”.

+
+
+ +
+
+openeo.processes.load_ml_model(id)[source]
+

Load a ML model

+
+
Parameters:
+

id – The STAC Item to load the machine learning model from. The STAC Item must implement the ml- +model extension.

+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

A machine learning model to be used with machine learning processes such as +predict_random_forest().

+
+
+
+

See also

+

openeo.org documentation on process “load_ml_model”.

+
+
+ +
+
+openeo.processes.load_result(id, spatial_extent=<object object>, temporal_extent=<object object>, bands=<object object>)[source]
+

Load batch job results

+
+
Parameters:
+
    +
  • id – The id of a batch job with results.

  • +
  • spatial_extent – Limits the data to load from the batch job result to the specified bounding box or +polygons. * For raster data, the process loads the pixel into the data cube if the point at the pixel +center intersects with the bounding box or any of the polygons (as defined in the Simple Features standard +by the OGC). * For vector data, the process loads the geometry into the data cube of the geometry is fully +within the bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). +Empty geometries may only be in the data cube if no spatial extent has been provided. The GeoJSON can be +one of the following feature types: * A Polygon or MultiPolygon geometry, * a Feature with a +Polygon or MultiPolygon geometry, or * a FeatureCollection containing at least one Feature with +Polygon or MultiPolygon geometries. Set this parameter to null to set no limit for the spatial +extent. Be careful with this when loading large datasets! It is recommended to use this parameter instead +of using filter_bbox() or filter_spatial() directly after loading unbounded data.

  • +
  • temporal_extent – Limits the data to load from the batch job result to the specified left-closed +temporal interval. Applies to all temporal dimensions. The interval has to be specified as an array with +exactly two elements: 1. The first element is the start of the temporal interval. The specified instance +in time is included in the interval. 2. The second element is the end of the temporal interval. The +specified instance in time is excluded from the interval. The specified temporal strings follow [RFC +3339](https://www.rfc-editor.org/rfc/rfc3339.html). Also supports open intervals by setting one of the +boundaries to null, but never both. Set this parameter to null to set no limit for the temporal +extent. Be careful with this when loading large datasets! It is recommended to use this parameter instead +of using filter_temporal() directly after loading unbounded data.

  • +
  • bands – Only adds the specified bands into the data cube so that bands that don’t match the list of +band names are not available. Applies to all dimensions of type bands. Either the unique band name +(metadata field name in bands) or one of the common band names (metadata field common_name in bands) +can be specified. If the unique band name and the common name conflict, the unique band name has a higher +priority. The order of the specified array defines the order of the bands in the data cube. If multiple +bands match a common name, all matched bands are included in the original order. It is recommended to use +this parameter instead of using filter_bands() directly after loading unbounded data.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

A data cube for further processing.

+
+
+
+

See also

+

openeo.org documentation on process “load_result”.

+
+
+ +
+
+openeo.processes.load_stac(url, spatial_extent=<object object>, temporal_extent=<object object>, bands=<object object>, properties=<object object>)[source]
+

Loads data from STAC

+
+
Parameters:
+
    +
  • url – The URL to a static STAC catalog (STAC Item, STAC Collection, or STAC Catalog) or a specific +STAC API Collection that allows to filter items and to download assets. This includes batch job results, +which itself are compliant to STAC. For external URLs, authentication details such as API keys or tokens +may need to be included in the URL. Batch job results can be specified in two ways: - For Batch job +results at the same back-end, a URL pointing to the corresponding batch job results endpoint should be +provided. The URL usually ends with /jobs/{id}/results and {id} is the corresponding batch job ID. - +For external results, a signed URL must be provided. Not all back-ends support signed URLs, which are +provided as a link with the link relation canonical in the batch job result metadata.

  • +
  • spatial_extent – Limits the data to load to the specified bounding box or polygons. * For raster +data, the process loads the pixel into the data cube if the point at the pixel center intersects with the +bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). * For vector +data, the process loads the geometry into the data cube if the geometry is fully within the bounding box or +any of the polygons (as defined in the Simple Features standard by the OGC). Empty geometries may only be +in the data cube if no spatial extent has been provided. The GeoJSON can be one of the following feature +types: * A Polygon or MultiPolygon geometry, * a Feature with a Polygon or MultiPolygon +geometry, or * a FeatureCollection containing at least one Feature with Polygon or MultiPolygon +geometries. Set this parameter to null to set no limit for the spatial extent. Be careful with this when +loading large datasets! It is recommended to use this parameter instead of using filter_bbox() or +filter_spatial() directly after loading unbounded data.

  • +
  • temporal_extent – Limits the data to load to the specified left-closed temporal interval. Applies to +all temporal dimensions. The interval has to be specified as an array with exactly two elements: 1. The +first element is the start of the temporal interval. The specified instance in time is included in the +interval. 2. The second element is the end of the temporal interval. The specified instance in time is +excluded from the interval. The second element must always be greater/later than the first element. +Otherwise, a TemporalExtentEmpty exception is thrown. Also supports open intervals by setting one of the +boundaries to null, but never both. Set this parameter to null to set no limit for the temporal +extent. Be careful with this when loading large datasets! It is recommended to use this parameter instead +of using filter_temporal() directly after loading unbounded data.

  • +
  • bands – Only adds the specified bands into the data cube so that bands that don’t match the list of +band names are not available. Applies to all dimensions of type bands. Either the unique band name +(metadata field name in bands) or one of the common band names (metadata field common_name in bands) +can be specified. If the unique band name and the common name conflict, the unique band name has a higher +priority. The order of the specified array defines the order of the bands in the data cube. If multiple +bands match a common name, all matched bands are included in the original order. It is recommended to use +this parameter instead of using filter_bands() directly after loading unbounded data.

  • +
  • properties – Limits the data by metadata properties to include only data in the data cube which all +given conditions return true for (AND operation). Specify key-value-pairs with the key being the name of +the metadata property, which can be retrieved with the openEO Data Discovery for Collections. The value +must be a condition (user-defined process) to be evaluated against a STAC API. This parameter is not +supported for static STAC.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

A data cube for further processing.

+
+
+
+

See also

+

openeo.org documentation on process “load_stac”.

+
+
+ +
+
+openeo.processes.load_uploaded_files(paths, format, options=<object object>)[source]
+

Load files from the user workspace

+
+
Parameters:
+
    +
  • paths – The files to read. Folders can’t be specified, specify all files instead. An exception is +thrown if a file can’t be read.

  • +
  • format – The file format to read from. It must be one of the values that the server reports as +supported input file formats, which usually correspond to the short GDAL/OGR codes. If the format is not +suitable for loading the data, a FormatUnsuitable exception will be thrown. This parameter is case +insensitive.

  • +
  • options – The file format parameters to be used to read the files. Must correspond to the parameters +that the server reports as supported parameters for the chosen format. The parameter names and valid +values usually correspond to the GDAL/OGR format options.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

A data cube for further processing.

+
+
+
+

See also

+

openeo.org documentation on process “load_uploaded_files”.

+
+
+ +
+
+openeo.processes.load_url(url, format, options=<object object>)[source]
+

Load data from a URL

+
+
Parameters:
+
    +
  • url – The URL to read from. Authentication details such as API keys or tokens may need to be included +in the URL.

  • +
  • format – The file format to use when loading the data. It must be one of the values that the server +reports as supported input file formats, which usually correspond to the short GDAL/OGR codes. If the +format is not suitable for loading the data, a FormatUnsuitable exception will be thrown. This parameter +is case insensitive.

  • +
  • options – The file format parameters to use when reading the data. Must correspond to the parameters +that the server reports as supported parameters for the chosen format. The parameter names and valid +values usually correspond to the GDAL/OGR format options.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

A data cube for further processing.

+
+
+
+

See also

+

openeo.org documentation on process “load_url”.

+
+
+ +
+
+openeo.processes.log(x, base)[source]
+

Logarithm to a base

+
+
Parameters:
+
    +
  • x – A number to compute the logarithm for.

  • +
  • base – The numerical base.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The computed logarithm.

+
+
+
+

See also

+

openeo.org documentation on process “log”.

+
+
+ +
+
+openeo.processes.lt(x, y)[source]
+

Less than comparison

+
+
Parameters:
+
    +
  • x – First operand.

  • +
  • y – Second operand.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

true if x is strictly less than y, null if any operand is null, otherwise false.

+
+
+
+

See also

+

openeo.org documentation on process “lt”.

+
+
+ +
+
+openeo.processes.lte(x, y)[source]
+

Less than or equal to comparison

+
+
Parameters:
+
    +
  • x – First operand.

  • +
  • y – Second operand.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

true if x is less than or equal to y, null if any operand is null, otherwise false.

+
+
+
+

See also

+

openeo.org documentation on process “lte”.

+
+
+ +
+
+openeo.processes.mask(data, mask, replacement=<object object>)[source]
+

Apply a raster mask

+
+
Parameters:
+
    +
  • data – A raster data cube.

  • +
  • mask – A mask as a raster data cube. Every pixel in data must have a corresponding element in +mask.

  • +
  • replacement – The value used to replace masked values with.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

A masked raster data cube with the same dimensions. The dimension properties (name, type, labels, +reference system and resolution) remain unchanged.

+
+
+
+

See also

+

openeo.org documentation on process “mask”.

+
+
+ +
+
+openeo.processes.mask_polygon(data, mask, replacement=<object object>, inside=<object object>)[source]
+

Apply a polygon mask

+
+
Parameters:
+
    +
  • data – A raster data cube.

  • +
  • mask – A GeoJSON object or a vector data cube containing at least one polygon. The provided vector +data can be one of the following: * A Polygon or MultiPolygon geometry, * a Feature with a Polygon +or MultiPolygon geometry, or * a FeatureCollection containing at least one Feature with Polygon or +MultiPolygon geometries. * Empty geometries are ignored.

  • +
  • replacement – The value used to replace masked values with.

  • +
  • inside – If set to true all pixels for which the point at the pixel center does intersect with +any polygon are replaced.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

A masked raster data cube with the same dimensions. The dimension properties (name, type, labels, +reference system and resolution) remain unchanged.

+
+
+
+

See also

+

openeo.org documentation on process “mask_polygon”.

+
+
+ +
+
+openeo.processes.max(data, ignore_nodata=<object object>)[source]
+

Maximum value

+
+
Parameters:
+
    +
  • data – An array of numbers.

  • +
  • ignore_nodata – Indicates whether no-data values are ignored or not. Ignores them by default. Setting +this flag to false considers no-data values so that null is returned if any value is such a value.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The maximum value.

+
+
+
+

See also

+

openeo.org documentation on process “max”.

+
+
+ +
+
+openeo.processes.mean(data, ignore_nodata=<object object>)[source]
+

Arithmetic mean (average)

+
+
Parameters:
+
    +
  • data – An array of numbers.

  • +
  • ignore_nodata – Indicates whether no-data values are ignored or not. Ignores them by default. Setting +this flag to false considers no-data values so that null is returned if any value is such a value.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The computed arithmetic mean.

+
+
+
+

See also

+

openeo.org documentation on process “mean”.

+
+
+ +
+
+openeo.processes.median(data, ignore_nodata=<object object>)[source]
+

Statistical median

+
+
Parameters:
+
    +
  • data – An array of numbers.

  • +
  • ignore_nodata – Indicates whether no-data values are ignored or not. Ignores them by default. Setting +this flag to false considers no-data values so that null is returned if any value is such a value.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The computed statistical median.

+
+
+
+

See also

+

openeo.org documentation on process “median”.

+
+
+ +
+
+openeo.processes.merge_cubes(cube1, cube2, overlap_resolver=<object object>, context=<object object>)[source]
+

Merge two data cubes

+
+
Parameters:
+
    +
  • cube1 – The base data cube.

  • +
  • cube2 – The other data cube to be merged with the base data cube.

  • +
  • overlap_resolver – A reduction operator that resolves the conflict if the data overlaps. The reducer +must return a value of the same data type as the input values are. The reduction operator may be a single +process such as multiply() or consist of multiple sub-processes. null (the default) can be specified +if no overlap resolver is required.

  • +
  • context – Additional data to be passed to the overlap resolver.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The merged data cube. See the process description for details regarding the dimensions and +dimension properties (name, type, labels, reference system and resolution).

+
+
+
+

See also

+

openeo.org documentation on process “merge_cubes”.

+
+
+ +
+
+openeo.processes.min(data, ignore_nodata=<object object>)[source]
+

Minimum value

+
+
Parameters:
+
    +
  • data – An array of numbers.

  • +
  • ignore_nodata – Indicates whether no-data values are ignored or not. Ignores them by default. Setting +this flag to false considers no-data values so that null is returned if any value is such a value.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The minimum value.

+
+
+
+

See also

+

openeo.org documentation on process “min”.

+
+
+ +
+
+openeo.processes.mod(x, y)[source]
+

Modulo

+
+
Parameters:
+
    +
  • x – A number to be used as the dividend.

  • +
  • y – A number to be used as the divisor.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The remainder after division.

+
+
+
+

See also

+

openeo.org documentation on process “mod”.

+
+
+ +
+
+openeo.processes.multiply(x, y)[source]
+

Multiplication of two numbers

+
+
Parameters:
+
    +
  • x – The multiplier.

  • +
  • y – The multiplicand.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The computed product of the two numbers.

+
+
+
+

See also

+

openeo.org documentation on process “multiply”.

+
+
+ +
+
+openeo.processes.nan()[source]
+

Not a Number (NaN)

+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

Returns NaN.

+
+
+
+

See also

+

openeo.org documentation on process “nan”.

+
+
+ +
+
+openeo.processes.ndvi(data, nir=<object object>, red=<object object>, target_band=<object object>)[source]
+

Normalized Difference Vegetation Index

+
+
Parameters:
+
    +
  • data – A raster data cube with two bands that have the common names red and nir assigned.

  • +
  • nir – The name of the NIR band. Defaults to the band that has the common name nir assigned. Either +the unique band name (metadata field name in bands) or one of the common band names (metadata field +common_name in bands) can be specified. If the unique band name and the common name conflict, the unique +band name has a higher priority.

  • +
  • red – The name of the red band. Defaults to the band that has the common name red assigned. Either +the unique band name (metadata field name in bands) or one of the common band names (metadata field +common_name in bands) can be specified. If the unique band name and the common name conflict, the unique +band name has a higher priority.

  • +
  • target_band – By default, the dimension of type bands is dropped. To keep the dimension specify a +new band name in this parameter so that a new dimension label with the specified name will be added for the +computed values.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

A raster data cube containing the computed NDVI values. The structure of the data cube differs +depending on the value passed to target_band: * target_band is null: The data cube does not contain +the dimension of type bands, the number of dimensions decreases by one. The dimension properties (name, +type, labels, reference system and resolution) for all other dimensions remain unchanged. * target_band +is a string: The data cube keeps the same dimensions. The dimension properties remain unchanged, but the +number of dimension labels for the dimension of type bands increases by one. The additional label is +named as specified in target_band.

+
+
+
+

See also

+

openeo.org documentation on process “ndvi”.

+
+
+ +
+
+openeo.processes.neq(x, y, delta=<object object>, case_sensitive=<object object>)[source]
+

Not equal to comparison

+
+
Parameters:
+
    +
  • x – First operand.

  • +
  • y – Second operand.

  • +
  • delta – Only applicable for comparing two numbers. If this optional parameter is set to a positive +non-zero number the non-equality of two numbers is checked against a delta value. This is especially useful +to circumvent problems with floating-point inaccuracy in machine-based computation. This option is +basically an alias for the following computation: gt(abs(minus([x, y]), delta)

  • +
  • case_sensitive – Only applicable for comparing two strings. Case sensitive comparison can be disabled +by setting this parameter to false.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

true if x is not equal to y, null if any operand is null, otherwise false.

+
+
+
+

See also

+

openeo.org documentation on process “neq”.

+
+
+ +
+
+openeo.processes.normalized_difference(x, y)[source]
+

Normalized difference

+
+
Parameters:
+
    +
  • x – The value for the first band.

  • +
  • y – The value for the second band.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The computed normalized difference.

+
+
+
+

See also

+

openeo.org documentation on process “normalized_difference”.

+
+
+ +
+
+openeo.processes.not_(x)[source]
+

Inverting a boolean

+
+
Parameters:
+

x – Boolean value to invert.

+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

Inverted boolean value.

+
+
+
+

See also

+

openeo.org documentation on process “not_”.

+
+
+ +
+
+openeo.processes.or_(x, y)[source]
+

Logical OR

+
+
Parameters:
+
    +
  • x – A boolean value.

  • +
  • y – A boolean value.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

Boolean result of the logical OR.

+
+
+
+

See also

+

openeo.org documentation on process “or_”.

+
+
+ +
+
+openeo.processes.order(data, asc=<object object>, nodata=<object object>)[source]
+

Get the order of array elements

+
+
Parameters:
+
    +
  • data – An array to compute the order for.

  • +
  • asc – The default sort order is ascending, with smallest values first. To sort in reverse +(descending) order, set this parameter to false.

  • +
  • nodata – Controls the handling of no-data values (null). By default, they are removed. If set to +true, missing values in the data are put last; if set to false, they are put first.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The computed permutation.

+
+
+
+

See also

+

openeo.org documentation on process “order”.

+
+
+ +
+
+openeo.processes.pi()[source]
+

Pi (π)

+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The numerical value of Pi.

+
+
+
+

See also

+

openeo.org documentation on process “pi”.

+
+
+ +
+
+openeo.processes.power(base, p)[source]
+

Exponentiation

+
+
Parameters:
+
    +
  • base – The numerical base.

  • +
  • p – The numerical exponent.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The computed value for base raised to the power of p.

+
+
+
+

See also

+

openeo.org documentation on process “power”.

+
+
+ +
+
+openeo.processes.predict_curve(parameters, function, dimension, labels=<object object>)[source]
+

Predict values

+
+
Parameters:
+
    +
  • parameters – A data cube with optimal values, e.g. computed by the process fit_curve().

  • +
  • function – The model function. It must take the parameters to fit as array through the first argument +and the independent variable x as the second argument. It is recommended to store the model function as +a user-defined process on the back-end.

  • +
  • dimension – The name of the dimension for predictions.

  • +
  • labels – The labels to predict values for. If no labels are given, predicts values only for no-data +(null) values in the data cube.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

A data cube with the predicted values with the provided dimension dimension having as many +labels as provided through labels.

+
+
+
+

See also

+

openeo.org documentation on process “predict_curve”.

+
+
+ +
+
+openeo.processes.predict_random_forest(data, model)[source]
+

Predict values based on a Random Forest model

+
+
Parameters:
+
    +
  • data – An array of numbers.

  • +
  • model – A model object that can be trained with the processes fit_regr_random_forest() +(regression) and fit_class_random_forest() (classification).

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The predicted value. Returns null if any of the given values in the array is a no-data value.

+
+
+
+

See also

+

openeo.org documentation on process “predict_random_forest”.

+
+
+ +
+
+openeo.processes.product(data, ignore_nodata=<object object>)[source]
+

Compute the product by multiplying numbers

+
+
Parameters:
+
    +
  • data – An array of numbers.

  • +
  • ignore_nodata – Indicates whether no-data values are ignored or not. Ignores them by default. Setting +this flag to false considers no-data values so that null is returned if any value is such a value.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The computed product of the sequence of numbers.

+
+
+
+

See also

+

openeo.org documentation on process “product”.

+
+
+ +
+
+openeo.processes.quantiles(data, probabilities=<object object>, q=<object object>, ignore_nodata=<object object>)[source]
+

Quantiles

+
+
Parameters:
+
    +
  • data – An array of numbers.

  • +
  • probabilities – Quantiles to calculate. Either a list of probabilities or the number of intervals: * +Provide an array with a sorted list of probabilities in ascending order to calculate quantiles for. The +probabilities must be between 0 and 1 (inclusive). If not sorted in ascending order, an +AscendingProbabilitiesRequired exception is thrown. * Provide an integer to specify the number of +intervals to calculate quantiles for. Calculates q-quantiles with equal-sized intervals.

  • +
  • q – Number of intervals to calculate quantiles for. Calculates q-quantiles with equal-sized +intervals. This parameter has been deprecated. Please use the parameter probabilities instead.

  • +
  • ignore_nodata – Indicates whether no-data values are ignored or not. Ignores them by default. Setting +this flag to false considers no-data values so that an array with null values is returned if any +element is such a value.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

An array with the computed quantiles. The list has either * as many elements as the given list of +probabilities had or * `q`-1 elements. If the input array is empty the resulting array is filled with +as many null values as required according to the list above. See the ‘Empty array’ example for an +example.

+
+
+
+

See also

+

openeo.org documentation on process “quantiles”.

+
+
+ +
+
+openeo.processes.rearrange(data, order)[source]
+

Sort an array based on a permutation

+
+
Parameters:
+
    +
  • data – The array to rearrange.

  • +
  • order – The permutation used for rearranging.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The rearranged array.

+
+
+
+

See also

+

openeo.org documentation on process “rearrange”.

+
+
+ +
+
+openeo.processes.reduce_dimension(data, reducer, dimension, context=<object object>)[source]
+

Reduce dimensions

+
+
Parameters:
+
    +
  • data – A data cube.

  • +
  • reducer – A reducer to apply on the specified dimension. A reducer is a single process such as +mean() or a set of processes, which computes a single value for a list of values, see the category +‘reducer’ for such processes.

  • +
  • dimension – The name of the dimension over which to reduce. Fails with a DimensionNotAvailable +exception if the specified dimension does not exist.

  • +
  • context – Additional data to be passed to the reducer.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

A data cube with the newly computed values. It is missing the given dimension, the number of +dimensions decreases by one. The dimension properties (name, type, labels, reference system and resolution) +for all other dimensions remain unchanged.

+
+
+
+

See also

+

openeo.org documentation on process “reduce_dimension”.

+
+
+ +
+
+openeo.processes.reduce_spatial(data, reducer, context=<object object>)[source]
+

Reduce spatial dimensions ‘x’ and ‘y’

+
+
Parameters:
+
    +
  • data – A raster data cube.

  • +
  • reducer – A reducer to apply on the horizontal spatial dimensions. A reducer is a single process such +as mean() or a set of processes, which computes a single value for a list of values, see the category +‘reducer’ for such processes.

  • +
  • context – Additional data to be passed to the reducer.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

A data cube with the newly computed values. It is missing the horizontal spatial dimensions, the +number of dimensions decreases by two. The dimension properties (name, type, labels, reference system and +resolution) for all other dimensions remain unchanged.

+
+
+
+

See also

+

openeo.org documentation on process “reduce_spatial”.

+
+
+ +
+
+openeo.processes.rename_dimension(data, source, target)[source]
+

Rename a dimension

+
+
Parameters:
+
    +
  • data – The data cube.

  • +
  • source – The current name of the dimension. Fails with a DimensionNotAvailable exception if the +specified dimension does not exist.

  • +
  • target – A new Name for the dimension. Fails with a DimensionExists exception if a dimension with +the specified name exists.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

A data cube with the same dimensions, but the name of one of the dimensions changes. The old name +can not be referred to any longer. The dimension properties (name, type, labels, reference system and +resolution) remain unchanged.

+
+
+
+

See also

+

openeo.org documentation on process “rename_dimension”.

+
+
+ +
+
+openeo.processes.rename_labels(data, dimension, target, source=<object object>)[source]
+

Rename dimension labels

+
+
Parameters:
+
    +
  • data – The data cube.

  • +
  • dimension – The name of the dimension to rename the labels for.

  • +
  • target – The new names for the labels. If a target dimension label already exists in the data cube, +a LabelExists exception is thrown.

  • +
  • source – The original names of the labels to be renamed to corresponding array elements in the +parameter target. It is allowed to only specify a subset of labels to rename, as long as the target and +source parameter have the same length. The order of the labels doesn’t need to match the order of the +dimension labels in the data cube. By default, the array is empty so that the dimension labels in the data +cube are expected to be enumerated. If the dimension labels are not enumerated and the given array is +empty, the LabelsNotEnumerated exception is thrown. If one of the source dimension labels doesn’t exist, +the LabelNotAvailable exception is thrown.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The data cube with the same dimensions. The dimension properties (name, type, labels, reference +system and resolution) remain unchanged, except that for the given dimension the labels change. The old +labels can not be referred to any longer. The number of labels remains the same.

+
+
+
+

See also

+

openeo.org documentation on process “rename_labels”.

+
+
+ +
+
+openeo.processes.resample_cube_spatial(data, target, method=<object object>)[source]
+

Resample the spatial dimensions to match a target data cube

+
+
Parameters:
+
    +
  • data – A raster data cube.

  • +
  • target – A raster data cube that describes the spatial target resolution.

  • +
  • method – Resampling method to use. The following options are available and are meant to align with +[gdalwarp](https://gdal.org/programs/gdalwarp.html#cmdoption-gdalwarp-r): * average: average (mean) +resampling, computes the weighted average of all valid pixels * bilinear: bilinear resampling * cubic: +cubic resampling * cubicspline: cubic spline resampling * lanczos: Lanczos windowed sinc resampling * +max: maximum resampling, selects the maximum value from all valid pixels * med: median resampling, +selects the median value of all valid pixels * min: minimum resampling, selects the minimum value from +all valid pixels * mode: mode resampling, selects the value which appears most often of all the sampled +points * near: nearest neighbour resampling (default) * q1: first quartile resampling, selects the +first quartile value of all valid pixels * q3: third quartile resampling, selects the third quartile +value of all valid pixels * rms root mean square (quadratic mean) of all valid pixels * sum: compute +the weighted sum of all valid pixels Valid pixels are determined based on the function is_valid().

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

A raster data cube with the same dimensions. The dimension properties (name, type, labels, +reference system and resolution) remain unchanged, except for the resolution and dimension labels of the +spatial dimensions.

+
+
+
+

See also

+

openeo.org documentation on process “resample_cube_spatial”.

+
+
+ +
+
+openeo.processes.resample_cube_temporal(data, target, dimension=<object object>, valid_within=<object object>)[source]
+

Resample temporal dimensions to match a target data cube

+
+
Parameters:
+
    +
  • data – A data cube with one or more temporal dimensions.

  • +
  • target – A data cube that describes the temporal target resolution.

  • +
  • dimension – The name of the temporal dimension to resample, which must exist with this name in both +data cubes. If the dimension is not set or is set to null, the process resamples all temporal dimensions +that exist with the same names in both data cubes. The following exceptions may occur: * A dimension is +given, but it does not exist in any of the data cubes: DimensionNotAvailable * A dimension is given, but +one of them is not temporal: DimensionMismatch * No specific dimension name is given and there are no +temporal dimensions with the same name in the data: DimensionMismatch

  • +
  • valid_within – Setting this parameter to a numerical value enables that the process searches for +valid values within the given period of days before and after the target timestamps. Valid values are +determined based on the function is_valid(). For example, the limit of 7 for the target timestamps +2020-01-15 12:00:00 looks for a nearest neighbor after 2020-01-08 12:00:00 and before 2020-01-22 +12:00:00. If no valid value is found within the given period, the value will be set to no-data (null).

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

A data cube with the same dimensions and the same dimension properties (name, type, labels, +reference system and resolution) for all non-temporal dimensions. For the temporal dimension, the name and +type remain unchanged, but the dimension labels, resolution and reference system may change.

+
+
+
+

See also

+

openeo.org documentation on process “resample_cube_temporal”.

+
+
+ +
+
+openeo.processes.resample_spatial(data, resolution=<object object>, projection=<object object>, method=<object object>, align=<object object>)[source]
+

Resample and warp the spatial dimensions

+
+
Parameters:
+
    +
  • data – A raster data cube.

  • +
  • resolution – Resamples the data cube to the target resolution, which can be specified either as +separate values for x and y or as a single value for both axes. Specified in the units of the target +projection. Doesn’t change the resolution by default (0).

  • +
  • projection – Warps the data cube to the target projection, specified as as [EPSG +code](http://www.epsg-registry.org/) or [WKT2 CRS +string](http://docs.opengeospatial.org/is/18-010r7/18-010r7.html). By default (null), the projection is +not changed.

  • +
  • method – Resampling method to use. The following options are available and are meant to align with +[gdalwarp](https://gdal.org/programs/gdalwarp.html#cmdoption-gdalwarp-r): * average: average (mean) +resampling, computes the weighted average of all valid pixels * bilinear: bilinear resampling * cubic: +cubic resampling * cubicspline: cubic spline resampling * lanczos: Lanczos windowed sinc resampling * +max: maximum resampling, selects the maximum value from all valid pixels * med: median resampling, +selects the median value of all valid pixels * min: minimum resampling, selects the minimum value from +all valid pixels * mode: mode resampling, selects the value which appears most often of all the sampled +points * near: nearest neighbour resampling (default) * q1: first quartile resampling, selects the +first quartile value of all valid pixels * q3: third quartile resampling, selects the third quartile +value of all valid pixels * rms root mean square (quadratic mean) of all valid pixels * sum: compute +the weighted sum of all valid pixels Valid pixels are determined based on the function is_valid().

  • +
  • align – Specifies to which corner of the spatial extent the new resampled data is aligned to.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

A raster data cube with values warped onto the new projection. It has the same dimensions and the +same dimension properties (name, type, labels, reference system and resolution) for all non-spatial or +vertical spatial dimensions. For the horizontal spatial dimensions the name and type remain unchanged, but +reference system, labels and resolution may change depending on the given parameters.

+
+
+
+

See also

+

openeo.org documentation on process “resample_spatial”.

+
+
+ +
+
+openeo.processes.round(x, p=<object object>)[source]
+

Round to a specified precision

+
+
Parameters:
+
    +
  • x – A number to round.

  • +
  • p – A positive number specifies the number of digits after the decimal point to round to. A negative +number means rounding to a power of ten, so for example -2 rounds to the nearest hundred. Defaults to +0.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The rounded number.

+
+
+
+

See also

+

openeo.org documentation on process “round”.

+
+
+ +
+
+openeo.processes.run_udf(data, udf, runtime, version=<object object>, context=<object object>)[source]
+

Run a UDF

+
+
Parameters:
+
    +
  • data – The data to be passed to the UDF.

  • +
  • udf – Either source code, an absolute URL or a path to a UDF script.

  • +
  • runtime – A UDF runtime identifier available at the back-end.

  • +
  • version – An UDF runtime version. If set to null, the default runtime version specified for each +runtime is used.

  • +
  • context – Additional data such as configuration options to be passed to the UDF.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The data processed by the UDF. The returned value can be of any data type and is exactly what the +UDF code returns.

+
+
+
+

See also

+

openeo.org documentation on process “run_udf”.

+
+
+ +
+
+openeo.processes.run_udf_externally(data, url, context=<object object>)[source]
+

Run an externally hosted UDF container

+
+
Parameters:
+
    +
  • data – The data to be passed to the UDF.

  • +
  • url – Absolute URL to a remote UDF service.

  • +
  • context – Additional data such as configuration options to be passed to the UDF.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The data processed by the UDF. The returned value can in principle be of any data type, but it +depends on what is returned by the UDF code. Please see the implemented UDF interface for details.

+
+
+
+

See also

+

openeo.org documentation on process “run_udf_externally”.

+
+
+ +
+
+openeo.processes.sar_backscatter(data, coefficient=<object object>, elevation_model=<object object>, mask=<object object>, contributing_area=<object object>, local_incidence_angle=<object object>, ellipsoid_incidence_angle=<object object>, noise_removal=<object object>, options=<object object>)[source]
+

Computes backscatter from SAR input

+
+
Parameters:
+
    +
  • data – The source data cube containing SAR input.

  • +
  • coefficient – Select the radiometric correction coefficient. The following options are available: * +beta0: radar brightness * sigma0-ellipsoid: ground area computed with ellipsoid earth model * +sigma0-terrain: ground area computed with terrain earth model * gamma0-ellipsoid: ground area computed +with ellipsoid earth model in sensor line of sight * gamma0-terrain: ground area computed with terrain +earth model in sensor line of sight (default) * null: non-normalized backscatter

  • +
  • elevation_model – The digital elevation model to use. Set to null (the default) to allow the back- +end to choose, which will improve portability, but reduce reproducibility.

  • +
  • mask – If set to true, a data mask is added to the bands with the name mask. It indicates which +values are valid (1), invalid (0) or contain no-data (null).

  • +
  • contributing_area – If set to true, a DEM-based local contributing area band named +contributing_area is added. The values are given in square meters.

  • +
  • local_incidence_angle – If set to true, a DEM-based local incidence angle band named +local_incidence_angle is added. The values are given in degrees.

  • +
  • ellipsoid_incidence_angle – If set to true, an ellipsoidal incidence angle band named +ellipsoid_incidence_angle is added. The values are given in degrees.

  • +
  • noise_removal – If set to false, no noise removal is applied. Defaults to true, which removes +noise.

  • +
  • options – Proprietary options for the backscatter computations. Specifying proprietary options will +reduce portability.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

Backscatter values corresponding to the chosen parametrization. The values are given in linear +scale.

+
+
+
+

See also

+

openeo.org documentation on process “sar_backscatter”.

+
+
+ +
+
+openeo.processes.save_result(data, format, options=<object object>)[source]
+

Save processed data

+
+
Parameters:
+
    +
  • data – The data to deliver in the given file format.

  • +
  • format – The file format to use. It must be one of the values that the server reports as supported +output file formats, which usually correspond to the short GDAL/OGR codes. This parameter is case +insensitive. * If the data cube is empty and the file format can’t store empty data cubes, a +DataCubeEmpty exception is thrown. * If the file format is otherwise not suitable for storing the +underlying data structure, a FormatUnsuitable exception is thrown.

  • +
  • options – The file format parameters to be used to create the file(s). Must correspond to the +parameters that the server reports as supported parameters for the chosen format. The parameter names and +valid values usually correspond to the GDAL/OGR format options.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

Always returns true as in case of an error an exception is thrown which aborts the execution of +the process.

+
+
+
+

See also

+

openeo.org documentation on process “save_result”.

+
+
+ +
+
+openeo.processes.sd(data, ignore_nodata=<object object>)[source]
+

Standard deviation

+
+
Parameters:
+
    +
  • data – An array of numbers.

  • +
  • ignore_nodata – Indicates whether no-data values are ignored or not. Ignores them by default. Setting +this flag to false considers no-data values so that null is returned if any value is such a value.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The computed sample standard deviation.

+
+
+
+

See also

+

openeo.org documentation on process “sd”.

+
+
+ +
+
+openeo.processes.sgn(x)[source]
+

Signum

+
+
Parameters:
+

x – A number.

+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The computed signum value of x.

+
+
+
+

See also

+

openeo.org documentation on process “sgn”.

+
+
+ +
+
+openeo.processes.sin(x)[source]
+

Sine

+
+
Parameters:
+

x – An angle in radians.

+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The computed sine of x.

+
+
+
+

See also

+

openeo.org documentation on process “sin”.

+
+
+ +
+
+openeo.processes.sinh(x)[source]
+

Hyperbolic sine

+
+
Parameters:
+

x – An angle in radians.

+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The computed hyperbolic sine of x.

+
+
+
+

See also

+

openeo.org documentation on process “sinh”.

+
+
+ +
+
+openeo.processes.sort(data, asc=<object object>, nodata=<object object>)[source]
+

Sort data

+
+
Parameters:
+
    +
  • data – An array with data to sort.

  • +
  • asc – The default sort order is ascending, with smallest values first. To sort in reverse +(descending) order, set this parameter to false.

  • +
  • nodata – Controls the handling of no-data values (null). By default, they are removed. If set to +true, missing values in the data are put last; if set to false, they are put first.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The sorted array.

+
+
+
+

See also

+

openeo.org documentation on process “sort”.

+
+
+ +
+
+openeo.processes.sqrt(x)[source]
+

Square root

+
+
Parameters:
+

x – A number.

+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The computed square root.

+
+
+
+

See also

+

openeo.org documentation on process “sqrt”.

+
+
+ +
+
+openeo.processes.subtract(x, y)[source]
+

Subtraction of two numbers

+
+
Parameters:
+
    +
  • x – The minuend.

  • +
  • y – The subtrahend.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The computed result.

+
+
+
+

See also

+

openeo.org documentation on process “subtract”.

+
+
+ +
+
+openeo.processes.sum(data, ignore_nodata=<object object>)[source]
+

Compute the sum by adding up numbers

+
+
Parameters:
+
    +
  • data – An array of numbers.

  • +
  • ignore_nodata – Indicates whether no-data values are ignored or not. Ignores them by default. Setting +this flag to false considers no-data values so that null is returned if any value is such a value.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The computed sum of the sequence of numbers.

+
+
+
+

See also

+

openeo.org documentation on process “sum”.

+
+
+ +
+
+openeo.processes.tan(x)[source]
+

Tangent

+
+
Parameters:
+

x – An angle in radians.

+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The computed tangent of x.

+
+
+
+

See also

+

openeo.org documentation on process “tan”.

+
+
+ +
+
+openeo.processes.tanh(x)[source]
+

Hyperbolic tangent

+
+
Parameters:
+

x – An angle in radians.

+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The computed hyperbolic tangent of x.

+
+
+
+

See also

+

openeo.org documentation on process “tanh”.

+
+
+ +
+
+openeo.processes.text_begins(data, pattern, case_sensitive=<object object>)[source]
+

Text begins with another text

+
+
Parameters:
+
    +
  • data – Text in which to find something at the beginning.

  • +
  • pattern – Text to find at the beginning of data. Regular expressions are not supported.

  • +
  • case_sensitive – Case sensitive comparison can be disabled by setting this parameter to false.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

true if data begins with pattern, false` otherwise.

+
+
+
+

See also

+

openeo.org documentation on process “text_begins”.

+
+
+ +
+
+openeo.processes.text_concat(data, separator=<object object>)[source]
+

Concatenate elements to a single text

+
+
Parameters:
+
    +
  • data – A set of elements. Numbers, boolean values and null values get converted to their (lower case) +string representation. For example: 1 (integer), -1.5 (number), true / false (boolean values)

  • +
  • separator – A separator to put between each of the individual texts. Defaults to an empty string.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

A string containing a string representation of all the array elements in the same order, with the +separator between each element.

+
+
+
+

See also

+

openeo.org documentation on process “text_concat”.

+
+
+ +
+
+openeo.processes.text_contains(data, pattern, case_sensitive=<object object>)[source]
+

Text contains another text

+
+
Parameters:
+
    +
  • data – Text in which to find something in.

  • +
  • pattern – Text to find in data. Regular expressions are not supported.

  • +
  • case_sensitive – Case sensitive comparison can be disabled by setting this parameter to false.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

true if data contains the pattern, false` otherwise.

+
+
+
+

See also

+

openeo.org documentation on process “text_contains”.

+
+
+ +
+
+openeo.processes.text_ends(data, pattern, case_sensitive=<object object>)[source]
+

Text ends with another text

+
+
Parameters:
+
    +
  • data – Text in which to find something at the end.

  • +
  • pattern – Text to find at the end of data. Regular expressions are not supported.

  • +
  • case_sensitive – Case sensitive comparison can be disabled by setting this parameter to false.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

true if data ends with pattern, false` otherwise.

+
+
+
+

See also

+

openeo.org documentation on process “text_ends”.

+
+
+ +
+
+openeo.processes.trim_cube(data)[source]
+

Remove dimension labels with no-data values

+
+
Parameters:
+

data – A data cube to trim.

+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

A trimmed data cube with the same dimensions. The dimension properties name, type, reference +system and resolution remain unchanged. The number of dimension labels may decrease.

+
+
+
+

See also

+

openeo.org documentation on process “trim_cube”.

+
+
+ +
+
+openeo.processes.unflatten_dimension(data, dimension, target_dimensions, label_separator=<object object>)[source]
+

Split a single dimensions into multiple dimensions

+
+
Parameters:
+
    +
  • data – A data cube that is consistently structured so that operation can execute flawlessly (e.g. the +dimension labels need to contain the label_separator exactly 1 time for two target dimensions, 2 times +for three target dimensions etc.).

  • +
  • dimension – The name of the dimension to split.

  • +
  • target_dimensions – The names of the new target dimensions. New dimensions will be created with the +given names and type other (see add_dimension()). Fails with a TargetDimensionExists exception if +any of the dimensions exists. The order of the array defines the order in which the dimensions and +dimension labels are added to the data cube (see the example in the process description).

  • +
  • label_separator – The string that will be used as a separator to split the dimension labels.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

A data cube with the new shape. The dimension properties (name, type, labels, reference system and +resolution) for all other dimensions remain unchanged.

+
+
+
+

See also

+

openeo.org documentation on process “unflatten_dimension”.

+
+
+ +
+
+openeo.processes.variance(data, ignore_nodata=<object object>)[source]
+

Variance

+
+
Parameters:
+
    +
  • data – An array of numbers.

  • +
  • ignore_nodata – Indicates whether no-data values are ignored or not. Ignores them by default. Setting +this flag to false considers no-data values so that null is returned if any value is such a value.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

The computed sample variance.

+
+
+
+

See also

+

openeo.org documentation on process “variance”.

+
+
+ +
+
+openeo.processes.vector_buffer(geometries, distance)[source]
+

Buffer geometries by distance

+
+
Parameters:
+
    +
  • geometries – Geometries to apply the buffer on. Feature properties are preserved.

  • +
  • distance – The distance of the buffer in meters. A positive distance expands the geometries, +resulting in outward buffering (dilation), while a negative distance shrinks the geometries, resulting in +inward buffering (erosion). If the unit of the spatial reference system is not meters, a UnitMismatch +error is thrown. Use vector_reproject() to convert the geometries to a suitable spatial reference +system.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

Returns a vector data cube with the computed new geometries of which some may be empty.

+
+
+
+

See also

+

openeo.org documentation on process “vector_buffer”.

+
+
+ +
+
+openeo.processes.vector_reproject(data, projection, dimension=<object object>)[source]
+

Reprojects the geometry dimension

+
+
Parameters:
+
    +
  • data – A vector data cube.

  • +
  • projection – Coordinate reference system to reproject to. Specified as an [EPSG +code](http://www.epsg-registry.org/) or [WKT2 CRS +string](http://docs.opengeospatial.org/is/18-010r7/18-010r7.html).

  • +
  • dimension – The name of the geometry dimension to reproject. If no specific dimension is specified, +the filter applies to all geometry dimensions. Fails with a DimensionNotAvailable exception if the +specified dimension does not exist.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

A vector data cube with geometries projected to the new coordinate reference system. The reference +system of the geometry dimension changes, all other dimensions and properties remain unchanged.

+
+
+
+

See also

+

openeo.org documentation on process “vector_reproject”.

+
+
+ +
+
+openeo.processes.vector_to_random_points(data, geometry_count=<object object>, total_count=<object object>, group=<object object>, seed=<object object>)[source]
+

Sample random points from geometries

+
+
Parameters:
+
    +
  • data – Input geometries for sample extraction.

  • +
  • geometry_count – The maximum number of points to compute per geometry. Points in the input +geometries can be selected only once by the sampling.

  • +
  • total_count – The maximum number of points to compute overall. Throws a CountMismatch exception if +the specified value is less than the provided number of geometries.

  • +
  • group – Specifies whether the sampled points should be grouped by input geometry (default) or be +generated as independent points. * If the sampled points are grouped, the process generates a MultiPoint +per geometry given which keeps the original identifier if present. * Otherwise, each sampled point is +generated as a distinct Point geometry without identifier.

  • +
  • seed – A randomization seed to use for random sampling. If not given or null, no seed is used and +results may differ on subsequent use.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

Returns a vector data cube with the sampled points.

+
+
+
+

See also

+

openeo.org documentation on process “vector_to_random_points”.

+
+
+ +
+
+openeo.processes.vector_to_regular_points(data, distance, group=<object object>)[source]
+

Sample regular points from geometries

+
+
Parameters:
+
    +
  • data – Input geometries for sample extraction.

  • +
  • distance – Defines the minimum distance in meters that is required between two samples generated +inside a single geometry. If the unit of the spatial reference system is not meters, a UnitMismatch +error is thrown. Use vector_reproject() to convert the geometries to a suitable spatial reference +system. - For polygons, the distance defines the cell sizes of a regular grid that starts at the +upper-left bound of each polygon. The centroid of each cell is then a sample point. If the centroid is not +enclosed in the polygon, no point is sampled. If no point can be sampled for the geometry at all, the first +coordinate of the geometry is returned as point. - For lines (line strings), the sampling starts with a +point at the first coordinate of the line and then walks along the line and samples a new point each time +the distance to the previous point has been reached again. - For points, the point is returned as +given.

  • +
  • group – Specifies whether the sampled points should be grouped by input geometry (default) or be +generated as independent points. * If the sampled points are grouped, the process generates a MultiPoint +per geometry given which keeps the original identifier if present. * Otherwise, each sampled point is +generated as a distinct Point geometry without identifier.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

Returns a vector data cube with the sampled points.

+
+
+
+

See also

+

openeo.org documentation on process “vector_to_regular_points”.

+
+
+ +
+
+openeo.processes.xor(x, y)[source]
+

Logical XOR (exclusive or)

+
+
Parameters:
+
    +
  • x – A boolean value.

  • +
  • y – A boolean value.

  • +
+
+
Return type:
+

ProcessBuilder

+
+
Returns:
+

Boolean result of the logical XOR.

+
+
+
+

See also

+

openeo.org documentation on process “xor”.

+
+
+ +
+
+

ProcessBuilder helper class

+
+
+class openeo.processes.ProcessBuilder(pgnode)[source]
+

The ProcessBuilder class +is a helper class that implements +(much like the openEO process functions) +each openEO process as a method. +On top of that it also adds syntactic sugar to support Python operators as well +(e.g. + is translated to the add process).

+
+

Attention

+

As normal user, you should never create a +ProcessBuilder instance +directly.

+

You should only interact with this class inside a callback +function/lambda while building a child callback process graph +as discussed at Callback as a callable.

+
+

For example, let’s start from this simple usage snippet +where we want to reduce the temporal dimension +by taking the temporal mean of each timeseries:

+
def my_reducer(data):
+    return data.mean()
+
+cube.reduce_dimension(reducer=my_reducer, dimension="t")
+
+
+

Note that this my_reducer function has a data argument, +which conceptually corresponds to an array of pixel values +(along the temporal dimension). +However, it’s important to understand that the my_reducer function +is actually not evaluated when you execute your process graph +on an openEO back-end, e.g. as a batch jobs. +Instead, my_reducer is evaluated +while building your process graph client-side +(at the time you execute that cube.reduce_dimension() statement to be precise). +This means that that data argument is actually not a concrete array of EO data, +but some kind of virtual placeholder, +a ProcessBuilder instance, +that keeps track of the operations you intend to do on the EO data.

+

To make that more concrete, it helps to add type hints +which will make it easier to discover what you can do with the argument +(depending on which editor or IDE you are using):

+
from openeo.processes import ProcessBuilder
+
+def my_reducer(data: ProcessBuilder) -> ProcessBuilder:
+    return data.mean()
+
+cube.reduce_dimension(reducer=my_reducer, dimension="t")
+
+
+

Because ProcessBuilder methods +return new ProcessBuilder instances, +and because it support syntactic sugar to use Python operators on it, +and because openeo.process functions +also accept and return ProcessBuilder instances, +we can mix methods, functions and operators in the callback function like this:

+
from openeo.processes import ProcessBuilder, cos
+
+def my_reducer(data: ProcessBuilder) -> ProcessBuilder:
+    return cos(data.mean()) + 1.23
+
+cube.reduce_dimension(reducer=my_reducer, dimension="t")
+
+
+

or compactly, using an anonymous lambda expression:

+
from openeo.processes import cos
+
+cube.reduce_dimension(
+    reducer=lambda data: cos(data.mean())) + 1.23,
+    dimension="t"
+)
+
+
+
+ +
+
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/api.html b/api.html new file mode 100644 index 000000000..aa5dd46ea --- /dev/null +++ b/api.html @@ -0,0 +1,6941 @@ + + + + + + + + API (General) — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +
+

API (General)

+
+

High level Interface

+

The high-level interface tries to provide an opinionated, Pythonic, API +to interact with openEO back-ends. It’s aim is to hide some of the details +of using a web service, so the user can produce concise and readable code.

+

Users that want to interact with openEO on a lower level, and have more control, can +use the lower level classes.

+
+
+

openeo

+
+
+openeo.connect(url=None, *, auth_type=None, auth_options=None, session=None, default_timeout=None, auto_validate=True)[source]
+

This method is the entry point to OpenEO. +You typically create one connection object in your script or application +and re-use it for all calls to that backend.

+

If the backend requires authentication, you can pass authentication data directly to this function, +but it could be easier to authenticate as follows:

+
>>> # For basic authentication
+>>> conn = connect(url).authenticate_basic(username="john", password="foo")
+>>> # For OpenID Connect authentication
+>>> conn = connect(url).authenticate_oidc(client_id="myclient")
+
+
+
+
Parameters:
+
    +
  • url (Optional[str]) – The http url of the OpenEO back-end.

  • +
  • auth_type (Optional[str]) – Which authentication to use: None, “basic” or “oidc” (for OpenID Connect)

  • +
  • auth_options (Optional[dict]) – Options/arguments specific to the authentication type

  • +
  • default_timeout (Optional[int]) – default timeout (in seconds) for requests

  • +
  • auto_validate (bool) – toggle to automatically validate process graphs before execution

  • +
+
+
Return type:
+

Connection

+
+
+
+

Added in version 0.24.0: added auto_validate argument

+
+
+ +
+
+

openeo.rest.datacube

+

The main module for creating earth observation processes. It aims to easily build complex process chains, that can +be evaluated by an openEO backend.

+
+
+openeo.rest.datacube.THIS
+

Symbolic reference to the current data cube, to be used as argument in DataCube.process() calls

+
+ +
+
+class openeo.rest.datacube.DataCube(graph, connection=None, metadata=None)[source]
+

Class representing a openEO (raster) data cube.

+

The data cube is represented by its corresponding openeo “process graph” +and this process graph can be “grown” to a desired workflow by calling the appropriate methods.

+
+
+__init__(graph, connection=None, metadata=None)[source]
+
+ +
+
+add(other, reverse=False)[source]
+
+
Return type:
+

DataCube

+
+
+
+

See also

+

openeo.org documentation on process “add”.

+
+
+ +
+
+add_dimension(name, label, type=None)[source]
+

Adds a new named dimension to the data cube. +Afterwards, the dimension can be referenced with the specified name. If a dimension with the specified name exists, +the process fails with a DimensionExists error. The dimension label of the dimension is set to the specified label.

+

This call does not modify the datacube in place, but returns a new datacube with the additional dimension.

+
+
Parameters:
+
    +
  • name (str) – The name of the dimension to add

  • +
  • label (str) – The dimension label.

  • +
  • type (Optional[str]) – Dimension type, allowed values: ‘spatial’, ‘temporal’, ‘bands’, ‘other’, default value is ‘other’

  • +
+
+
Returns:
+

The data cube with a newly added dimension. The new dimension has exactly one dimension label. All other dimensions remain unchanged.

+
+
+
+

See also

+

openeo.org documentation on process “add_dimension”.

+
+
+ +
+
+aggregate_spatial(geometries, reducer, target_dimension=None, crs=None, context=None)[source]
+

Aggregates statistics for one or more geometries (e.g. zonal statistics for polygons) +over the spatial dimensions.

+
+
Parameters:
+
    +
  • geometries (Union[BaseGeometry, dict, str, Path, Parameter, VectorCube]) –

    The geometries to aggregate in. Can be provided in different ways:

    +
      +
    • a shapely geometry

    • +
    • a GeoJSON-style dictionary,

    • +
    • a public URL to the geometries in a vector format that is supported by the backend +(also see Connection.list_file_formats()), +e.g. GeoJSON, GeoParquet, etc. +A load_url process will automatically be added to the process graph.

    • +
    • a path (str or Path) to a local, client-side GeoJSON file, +which will be loaded automatically to get the geometries as GeoJSON construct.

    • +
    • a VectorCube instance.

    • +
    • a Parameter instance.

    • +
    +

  • +
  • reducer (Union[str, Callable, PGNode]) –

    the “child callback”: +the name of a single openEO process, +or a callback function as discussed in Processes with child “callbacks”, +or a UDF instance.

    +

    The callback should correspond to a process that +receives an array of numerical values +and returns a single numerical value. +For example:

    + +

  • +
  • target_dimension (Optional[str]) – The new dimension name to be used for storing the results.

  • +
  • crs (Union[int, str, None]) –

    The spatial reference system of the provided polygon. +By default, longitude-latitude (EPSG:4326) is assumed. +See openeo.util.normalize_crs() for more details about additional normalization that is applied to this argument.

    +
    +

    Note

    +

    this crs argument is a non-standard/experimental feature, only supported by specific back-ends. +See https://github.com/Open-EO/openeo-processes/issues/235 for details.

    +
    +

  • +
  • context (Optional[dict]) – Additional data to be passed to the reducer process.

  • +
+
+
Return type:
+

VectorCube

+
+
+
+

Changed in version 0.36.0: Support passing a URL as geometries argument, which will be loaded with the load_url process.

+
+
+

Changed in version 0.36.0: Support for passing a backend-side path as geometries argument was removed +(also see Legacy read_vector usage). +Instead, it’s possible to provide a client-side path to a GeoJSON file +(which will be loaded client-side to get the geometries as GeoJSON construct).

+
+
+

See also

+

openeo.org documentation on process “aggregate_spatial”.

+
+
+ +
+
+aggregate_spatial_window(reducer, size, boundary='pad', align='upper-left', context=None)[source]
+

Aggregates statistics over the horizontal spatial dimensions (axes x and y) of the data cube.

+

The pixel grid for the axes x and y is divided into non-overlapping windows with the size +specified in the parameter size. If the number of values for the axes x and y is not a multiple +of the corresponding window size, the behavior specified in the parameters boundary and align +is applied. For each of these windows, the reducer process computes the result.

+
+
Parameters:
+
    +
  • reducer (Union[str, Callable, PGNode]) – the “child callback”: +the name of a single openEO process, +or a callback function as discussed in Processes with child “callbacks”, +or a UDF instance.

  • +
  • size (List[int]) – Window size in pixels along the horizontal spatial dimensions. +The first value corresponds to the x axis, the second value corresponds to the y axis.

  • +
  • boundary (str) –

    Behavior to apply if the number of values for the axes x and y is not a +multiple of the corresponding value in the size parameter. +Options are:

    +
    +
      +
    • pad (default): pad the data cube with the no-data value null to fit the required window size.

    • +
    • trim: trim the data cube to fit the required window size.

    • +
    +
    +

    Use the parameter align to align the data to the desired corner.

    +

  • +
  • align (str) – If the data requires padding or trimming (see parameter boundary), specifies +to which corner of the spatial extent the data is aligned to. For example, if the data is +aligned to the upper left, the process pads/trims at the lower-right.

  • +
  • context (Optional[dict]) – Additional data to be passed to the process.

  • +
+
+
Return type:
+

DataCube

+
+
Returns:
+

A data cube with the newly computed values and the same dimensions.

+
+
+
+

See also

+

openeo.org documentation on process “aggregate_spatial_window”.

+
+
+ +
+
+aggregate_temporal(intervals, reducer, labels=None, dimension=None, context=None)[source]
+

Computes a temporal aggregation based on an array of date and/or time intervals.

+

Calendar hierarchies such as year, month, week etc. must be transformed into specific intervals by the clients. For each interval, all data along the dimension will be passed through the reducer. The computed values will be projected to the labels, so the number of labels and the number of intervals need to be equal.

+

If the dimension is not set, the data cube is expected to only have one temporal dimension.

+
+
Parameters:
+
    +
  • intervals (List[list]) – Temporal left-closed intervals so that the start time is contained, but not the end time.

  • +
  • reducer (Union[str, Callable, PGNode]) –

    the “child callback”: +the name of a single openEO process, +or a callback function as discussed in Processes with child “callbacks”, +or a UDF instance.

    +

    The callback should correspond to a process that +receives an array of numerical values +and returns a single numerical value. +For example:

    + +

  • +
  • labels (Optional[List[str]]) – Labels for the intervals. The number of labels and the number of groups need to be equal.

  • +
  • dimension (Optional[str]) – The temporal dimension for aggregation. All data along the dimension will be passed through the specified reducer. If the dimension is not set, the data cube is expected to only have one temporal dimension.

  • +
  • context (Optional[dict]) – Additional data to be passed to the reducer. Not set by default.

  • +
+
+
Return type:
+

DataCube

+
+
Returns:
+

A DataCube containing a result for each time window

+
+
+
+

See also

+

openeo.org documentation on process “aggregate_temporal”.

+
+
+ +
+
+aggregate_temporal_period(period, reducer, dimension=None, context=None)[source]
+

Computes a temporal aggregation based on calendar hierarchies such as years, months or seasons. For other calendar hierarchies aggregate_temporal can be used.

+

For each interval, all data along the dimension will be passed through the reducer.

+

If the dimension is not set or is set to null, the data cube is expected to only have one temporal dimension.

+

The period argument specifies the time intervals to aggregate. The following pre-defined values are available:

+
    +
  • hour: Hour of the day

  • +
  • day: Day of the year

  • +
  • week: Week of the year

  • +
  • dekad: Ten day periods, counted per year with three periods per month (day 1 - 10, 11 - 20 and 21 - end of month). The third dekad of the month can range from 8 to 11 days. For example, the fourth dekad is Feb, 1 - Feb, 10 each year.

  • +
  • month: Month of the year

  • +
  • season: Three month periods of the calendar seasons (December - February, March - May, June - August, September - November).

  • +
  • tropical-season: Six month periods of the tropical seasons (November - April, May - October).

  • +
  • year: Proleptic years

  • +
  • decade: Ten year periods (0-to-9 decade), from a year ending in a 0 to the next year ending in a 9.

  • +
  • decade-ad: Ten year periods (1-to-0 decade) better aligned with the Anno Domini (AD) calendar era, from a year ending in a 1 to the next year ending in a 0.

  • +
+
+
Parameters:
+
    +
  • period (str) – The period of the time intervals to aggregate.

  • +
  • reducer (Union[str, PGNode, Callable]) – A reducer to be applied on all values along the specified dimension. The reducer must be a callable process (or a set processes) that accepts an array and computes a single return value of the same type as the input values, for example median.

  • +
  • dimension (Optional[str]) – The temporal dimension for aggregation. All data along the dimension will be passed through the specified reducer. If the dimension is not set, the data cube is expected to only have one temporal dimension.

  • +
  • context (Optional[Dict]) – Additional data to be passed to the reducer.

  • +
+
+
Return type:
+

DataCube

+
+
Returns:
+

A data cube with the same dimensions. The dimension properties (name, type, labels, reference system and resolution) remain unchanged.

+
+
+
+

See also

+

openeo.org documentation on process “aggregate_temporal_period”.

+
+
+ +
+
+apply(process, context=None)[source]
+

Applies a unary process (a local operation) to each value of the specified or all dimensions in the data cube.

+
+
Parameters:
+
    +
  • process (Union[str, Callable, UDF, PGNode]) –

    the “child callback”: +the name of a single process, +or a callback function as discussed in Processes with child “callbacks”, +or a UDF instance.

    +

    The callback should correspond to a process that +receives a single numerical value +and returns a single numerical value. +For example:

    + +

  • +
  • context (Optional[dict]) – Additional data to be passed to the process.

  • +
+
+
Return type:
+

DataCube

+
+
Returns:
+

A data cube with the newly computed values. The resolution, cardinality and the number of dimensions are the same as for the original data cube.

+
+
+
+

See also

+

openeo.org documentation on process “apply”.

+
+
+ +
+
+apply_dimension(code=None, runtime=None, process=None, version=None, dimension='t', target_dimension=None, context=None)[source]
+

Applies a process to all pixel values along a dimension of a raster data cube. For example, +if the temporal dimension is specified the process will work on a time series of pixel values.

+

The process to apply is specified by either code and runtime in case of a UDF, or by providing a callback function +in the process argument.

+

The process reduce_dimension also applies a process to pixel values along a dimension, but drops +the dimension afterwards. The process apply applies a process to each pixel value in the data cube.

+

The target dimension is the source dimension if not specified otherwise in the target_dimension parameter. +The pixel values in the target dimension get replaced by the computed pixel values. The name, type and +reference system are preserved.

+

The dimension labels are preserved when the target dimension is the source dimension and the number of +pixel values in the source dimension is equal to the number of values computed by the process. Otherwise, +the dimension labels will be incrementing integers starting from zero, which can be changed using +rename_labels afterwards. The number of labels will equal to the number of values computed by the process.

+
+
Parameters:
+
    +
  • code (Optional[str]) – [deprecated] UDF code or process identifier (optional)

  • +
  • runtime – [deprecated] UDF runtime to use (optional)

  • +
  • process (Union[str, Callable, UDF, PGNode]) –

    the “child callback”: +the name of a single process, +or a callback function as discussed in Processes with child “callbacks”, +or a UDF instance.

    +

    The callback should correspond to a process that +receives an array of numerical values +and returns an array of numerical values. +For example:

    + +

  • +
  • version (Optional[str]) – [deprecated] Version of the UDF runtime to use

  • +
  • dimension (str) – The name of the source dimension to apply the process on. Fails with a DimensionNotAvailable error if the specified dimension does not exist.

  • +
  • target_dimension (Optional[str]) – The name of the target dimension or null (the default) to use the source dimension +specified in the parameter dimension. By specifying a target dimension, the source dimension is removed. +The target dimension with the specified name and the type other (see add_dimension) is created, if it doesn’t exist yet.

  • +
  • context (Optional[dict]) – Additional data to be passed to the process.

  • +
+
+
Return type:
+

DataCube

+
+
Returns:
+

A datacube with the UDF applied to the given dimension.

+
+
Raises:
+

DimensionNotAvailable

+
+
+
+

Changed in version 0.13.0: arguments code, runtime and version are deprecated if favor of the standard approach +of using an UDF object in the process argument. +See openeo.UDF API and usage changes in version 0.13.0 for more background about the changes.

+
+
+

See also

+

openeo.org documentation on process “apply_dimension”.

+
+
+ +
+
+apply_kernel(kernel, factor=1.0, border=0, replace_invalid=0)[source]
+

Applies a focal operation based on a weighted kernel to each value of the specified dimensions in the data cube.

+

The border parameter determines how the data is extended when the kernel overlaps with the borders. +The following options are available:

+
    +
  • numeric value - fill with a user-defined constant number n: nnnnnn|abcdefgh|nnnnnn (default, with n = 0)

  • +
  • replicate - repeat the value from the pixel at the border: aaaaaa|abcdefgh|hhhhhh

  • +
  • reflect - mirror/reflect from the border: fedcba|abcdefgh|hgfedc

  • +
  • reflect_pixel - mirror/reflect from the center of the pixel at the border: gfedcb|abcdefgh|gfedcb

  • +
  • wrap - repeat/wrap the image: cdefgh|abcdefgh|abcdef

  • +
+
+
Parameters:
+
    +
  • kernel (Union[ndarray, List[List[float]]]) – The kernel to be applied on the data cube. The kernel has to be as many dimensions as the data cube has dimensions.

  • +
  • factor – A factor that is multiplied to each value computed by the focal operation. This is basically a shortcut for explicitly multiplying each value by a factor afterwards, which is often required for some kernel-based algorithms such as the Gaussian blur.

  • +
  • border – Determines how the data is extended when the kernel overlaps with the borders. Defaults to fill the border with zeroes.

  • +
  • replace_invalid – This parameter specifies the value to replace non-numerical or infinite numerical values with. By default, those values are replaced with zeroes.

  • +
+
+
Return type:
+

DataCube

+
+
Returns:
+

A data cube with the newly computed values. The resolution, cardinality and the number of dimensions are the same as for the original data cube.

+
+
+
+

See also

+

openeo.org documentation on process “apply_kernel”.

+
+
+ +
+
+apply_neighborhood(process, size, overlap=None, context=None)[source]
+

Applies a focal process to a data cube.

+

A focal process is a process that works on a ‘neighbourhood’ of pixels. The neighbourhood can extend into multiple dimensions, this extent is specified by the size argument. It is not only (part of) the size of the input window, but also the size of the output for a given position of the sliding window. The sliding window moves with multiples of size.

+

An overlap can be specified so that neighbourhoods can have overlapping boundaries. This allows for continuity of the output. The values included in the data cube as overlap can’t be modified by the given process.

+

The neighbourhood size should be kept small enough, to avoid running beyond computational resources, but a too small size will result in a larger number of process invocations, which may slow down processing. Window sizes for spatial dimensions typically are in the range of 64 to 512 pixels, while overlaps of 8 to 32 pixels are common.

+

The process must not add new dimensions, or remove entire dimensions, but the result can have different dimension labels.

+

For the special case of 2D convolution, it is recommended to use apply_kernel().

+
+
Parameters:
+
    +
  • size (List[Dict])

  • +
  • overlap (List[dict])

  • +
  • process (Union[str, PGNode, Callable, UDF]) – a callback function that creates a process graph, see Processes with child “callbacks”

  • +
  • context (Optional[dict]) – Additional data to be passed to the process.

  • +
+
+
Return type:
+

DataCube

+
+
Returns:
+

+
+
+
+

See also

+

openeo.org documentation on process “apply_neighborhood”.

+
+
+ +
+
+apply_polygon(geometries=None, process=None, mask_value=None, context=None, **kwargs)[source]
+

Apply a process to segments of the data cube that are defined by the given polygons. +For each polygon provided, all pixels for which the point at the pixel center intersects +with the polygon (as defined in the Simple Features standard by the OGC) are collected into sub data cubes. +If a pixel is part of multiple of the provided polygons (e.g., when the polygons overlap), +the GeometriesOverlap exception is thrown. +Each sub data cube is passed individually to the given process.

+
+
Parameters:
+
    +
  • geometries (Union[BaseGeometry, dict, str, Path, Parameter, VectorCube]) –

    Can be provided in different ways:

    +
      +
    • a shapely geometry

    • +
    • a GeoJSON-style dictionary,

    • +
    • a public URL to the geometries in a vector format that is supported by the backend +(also see Connection.list_file_formats()), +e.g. GeoJSON, GeoParquet, etc. +A load_url process will automatically be added to the process graph.

    • +
    • a path (str or Path) to a local, client-side GeoJSON file, +which will be loaded automatically to get the geometries as GeoJSON construct.

    • +
    • a VectorCube instance.

    • +
    • a Parameter instance.

    • +
    +

  • +
  • process (Union[str, PGNode, Callable, UDF]) – “child callback” function, see Processes with child “callbacks”

  • +
  • mask_value (Optional[float]) – The value used for pixels outside the polygon.

  • +
  • context (Optional[dict]) – Additional data to be passed to the process.

  • +
+
+
Return type:
+

DataCube

+
+
+
+

Warning

+

experimental process: not generally supported, API subject to change.

+
+
+

Changed in version 0.32.0: Argument polygons was renamed to geometries. +While deprecated, the old name polygons is still supported +as keyword argument for backwards compatibility.

+
+
+

Changed in version 0.36.0: Support passing a URL as geometries argument, which will be loaded with the load_url process.

+
+
+

Changed in version 0.36.0: Support for passing a backend-side path as geometries argument was removed +(also see Legacy read_vector usage). +Instead, it’s possible to provide a client-side path to a GeoJSON file +(which will be loaded client-side to get the geometries as GeoJSON construct).

+
+
+

See also

+

openeo.org documentation on process “apply_polygon”.

+
+
+ +
+
+ard_normalized_radar_backscatter(elevation_model=None, contributing_area=False, ellipsoid_incidence_angle=False, noise_removal=True)[source]
+

Computes CARD4L compliant backscatter (gamma0) from SAR input. +This method is a variant of sar_backscatter(), +with restricted parameters to generate backscatter according to CARD4L specifications.

+

Note that backscatter computation may require instrument specific metadata that is tightly coupled to the original SAR products. +As a result, this process may only work in combination with loading data from specific collections, not with general data cubes.

+
+
Parameters:
+
    +
  • elevation_model (str) – The digital elevation model to use. Set to None (the default) to allow the back-end to choose, which will improve portability, but reduce reproducibility.

  • +
  • contributing_area – If set to true, a DEM-based local contributing area band named contributing_area +is added. The values are given in square meters.

  • +
  • ellipsoid_incidence_angle (bool) – If set to True, an ellipsoidal incidence angle band named ellipsoid_incidence_angle is added. The values are given in degrees.

  • +
  • noise_removal (bool) – If set to false, no noise removal is applied. Defaults to True, which removes noise.

  • +
+
+
Return type:
+

DataCube

+
+
Returns:
+

Backscatter values expressed as gamma0. The data returned is CARD4L compliant and contains metadata. By default, the backscatter values are given in linear scale.

+
+
+
+

See also

+

openeo.org documentation on process “ard_normalized_radar_backscatter”.

+
+
+ +
+
+ard_surface_reflectance(atmospheric_correction_method, cloud_detection_method, elevation_model=None, atmospheric_correction_options=None, cloud_detection_options=None)[source]
+

Computes CARD4L compliant surface reflectance values from optical input.

+
+
Parameters:
+
    +
  • atmospheric_correction_method (str) – The atmospheric correction method to use.

  • +
  • cloud_detection_method (str) – The cloud detection method to use.

  • +
  • elevation_model (str) – The digital elevation model to use, leave empty to allow the back-end to make a suitable choice.

  • +
  • atmospheric_correction_options (dict) – Proprietary options for the atmospheric correction method.

  • +
  • cloud_detection_options (dict) – Proprietary options for the cloud detection method.

  • +
+
+
Return type:
+

DataCube

+
+
Returns:
+

Data cube containing bottom of atmosphere reflectances with atmospheric disturbances like clouds and cloud shadows removed. The data returned is CARD4L compliant and contains metadata.

+
+
+
+

See also

+

openeo.org documentation on process “ard_surface_reflectance”.

+
+
+ +
+
+atmospheric_correction(method=None, elevation_model=None, options=None)[source]
+

Applies an atmospheric correction that converts top of atmosphere reflectance values into bottom of atmosphere/top of canopy reflectance values.

+

Note that multiple atmospheric methods exist, but may not be supported by all backends. The method parameter gives +you the option of requiring a specific method, but this may result in an error if the backend does not support it.

+
+
Parameters:
+
    +
  • method (str) – The atmospheric correction method to use. To get reproducible results, you have to set a specific method. Set to null to allow the back-end to choose, which will improve portability, but reduce reproducibility as you may get different results if you run the processes multiple times.

  • +
  • elevation_model (str) – The digital elevation model to use, leave empty to allow the back-end to make a suitable choice.

  • +
  • options (dict) – Proprietary options for the atmospheric correction method.

  • +
+
+
Return type:
+

DataCube

+
+
Returns:
+

datacube with bottom of atmosphere reflectances

+
+
+
+

See also

+

openeo.org documentation on process “atmospheric_correction”.

+
+
+ +
+
+band(band)[source]
+

Filter out a single band

+
+
Parameters:
+

band (Union[str, int]) – band name, band common name or band index.

+
+
Return type:
+

DataCube

+
+
Returns:
+

a DataCube instance

+
+
+
+ +
+
+band_filter(bands)
+
+
Return type:
+

DataCube

+
+
+
+

Deprecated since version 0.1.0: Usage of this legacy method is deprecated. Use +filter_bands() instead.

+
+
+ +
+
+chunk_polygon(chunks, process, mask_value=None, context=None)[source]
+
+
Return type:
+

DataCube

+
+
+
+

Deprecated since version 0.26.0: Use apply_polygon().

+
+
+ +
+
+count_time()[source]
+

Counts the number of images with a valid mask in a time series for all bands of the input dataset.

+
+
Return type:
+

DataCube

+
+
Returns:
+

a DataCube instance

+
+
+
+

See also

+

openeo.org documentation on process “count”.

+
+
+ +
+
+classmethod create_collection(cls, collection_id, connection=None, spatial_extent=None, temporal_extent=None, bands=None, fetch_metadata=True, properties=None, max_cloud_cover=None)
+
+
Return type:
+

DataCube

+
+
+
+

Deprecated since version 0.4.6: Usage of this legacy class method is deprecated. Use +load_collection() instead.

+
+
+ +
+
+create_job(out_format=None, *, title=None, description=None, plan=None, budget=None, additional=None, job_options=None, validate=None, auto_add_save_result=True, **format_options)[source]
+

Sends the datacube’s process graph as a batch job to the back-end +and return a BatchJob instance.

+

Note that the batch job will just be created at the back-end, +it still needs to be started and tracked explicitly. +Use execute_batch() instead to have the openEO Python client take care of that job management.

+
+
Parameters:
+
    +
  • out_format (Optional[str]) – output file format.

  • +
  • title (Optional[str]) – job title

  • +
  • description (Optional[str]) – job description

  • +
  • plan (Optional[str]) – The billing plan to process and charge the job with

  • +
  • budget (Optional[float]) – Maximum budget to be spent on executing the job. +Note that some backends do not honor this limit.

  • +
  • additional (Optional[dict]) – additional (top-level) properties to set in the request body

  • +
  • job_options (Optional[dict]) – dictionary of job options to pass to the backend +(under top-level property “job_options”)

  • +
  • validate (Optional[bool]) – Optional toggle to enable/prevent validation of the process graphs before execution +(overruling the connection’s auto_validate setting).

  • +
  • auto_add_save_result (bool) – Automatically add a save_result node to the process graph if there is none yet.

  • +
+
+
Return type:
+

BatchJob

+
+
Returns:
+

Created job.

+
+
+
+

Added in version 0.32.0: Added auto_add_save_result option

+
+
+

Added in version 0.36.0: Added additional argument.

+
+
+ +
+
+dimension_labels(dimension)[source]
+

Gives all labels for a dimension in the data cube. The labels have the same order as in the data cube.

+
+
Parameters:
+

dimension (str) – The name of the dimension to get the labels for.

+
+
Return type:
+

DataCube

+
+
+
+

See also

+

openeo.org documentation on process “dimension_labels”.

+
+
+ +
+
+divide(other, reverse=False)[source]
+
+
Return type:
+

DataCube

+
+
+
+

See also

+

openeo.org documentation on process “divide”.

+
+
+ +
+
+download(outputfile=None, format=None, options=None, *, validate=None, auto_add_save_result=True, additional=None, job_options=None)[source]
+

Execute synchronously and download the raster data cube, e.g. as GeoTIFF.

+

If outputfile is provided, the result is stored on disk locally, otherwise, a bytes object is returned. +The bytes object can be passed on to a suitable decoder for decoding.

+
+
Parameters:
+
    +
  • outputfile (Union[str, Path, None]) – Optional, an output file if the result needs to be stored on disk.

  • +
  • format (Optional[str]) – Optional, an output format supported by the backend.

  • +
  • options (Optional[dict]) – Optional, file format options

  • +
  • validate (Optional[bool]) – Optional toggle to enable/prevent validation of the process graphs before execution +(overruling the connection’s auto_validate setting).

  • +
  • auto_add_save_result (bool) – Automatically add a save_result node to the process graph if there is none yet.

  • +
  • additional (Optional[dict]) – additional (top-level) properties to set in the request body

  • +
  • job_options (Optional[dict]) – dictionary of job options to pass to the backend +(under top-level property “job_options”)

  • +
+
+
Return type:
+

Optional[bytes]

+
+
Returns:
+

None if the result is stored to disk, or a bytes object returned by the backend.

+
+
+
+

Changed in version 0.32.0: Added auto_add_save_result option

+
+
+

Added in version 0.36.0: Added arguments additional and job_options.

+
+
+ +
+
+drop_dimension(name)[source]
+

Drops a dimension from the data cube. +Dropping a dimension only works on dimensions with a single dimension label left, otherwise the process fails +with a DimensionLabelCountMismatch exception. Dimension values can be reduced to a single value with a filter +such as filter_bands or the reduce_dimension process. If a dimension with the specified name does not exist, +the process fails with a DimensionNotAvailable exception.

+
+
Parameters:
+

name (str) – The name of the dimension to drop

+
+
Returns:
+

The data cube with the given dimension dropped.

+
+
+
+

See also

+

openeo.org documentation on process “drop_dimension”.

+
+
+ +
+
+execute(*, validate=None, auto_decode=True)[source]
+

Execute a process graph synchronously and return the result. If the result is a JSON object, it will be parsed.

+
+
Parameters:
+
    +
  • validate (Optional[bool]) – Optional toggle to enable/prevent validation of the process graphs before execution +(overruling the connection’s auto_validate setting).

  • +
  • auto_decode (bool) – Boolean flag to enable/disable automatic JSON decoding of the response. Defaults to True.

  • +
+
+
Return type:
+

Union[dict, Response]

+
+
Returns:
+

parsed JSON response as a dict if auto_decode is True, otherwise response object

+
+
+
+ +
+
+execute_batch(outputfile=None, out_format=None, *, title=None, description=None, plan=None, budget=None, print=<built-in function print>, max_poll_interval=60, connection_retry_interval=30, additional=None, job_options=None, validate=None, auto_add_save_result=True, **format_options)[source]
+

Evaluate the process graph by creating a batch job, and retrieving the results when it is finished. +This method is mostly recommended if the batch job is expected to run in a reasonable amount of time.

+

For very long-running jobs, you probably do not want to keep the client running.

+
+
Parameters:
+
    +
  • outputfile (Union[str, Path, None]) – The path of a file to which a result can be written

  • +
  • out_format (Optional[str]) – (optional) File format to use for the job result.

  • +
  • additional (Optional[dict]) – additional (top-level) properties to set in the request body

  • +
  • job_options (Optional[dict]) – dictionary of job options to pass to the backend +(under top-level property “job_options”)

  • +
  • validate (Optional[bool]) – Optional toggle to enable/prevent validation of the process graphs before execution +(overruling the connection’s auto_validate setting).

  • +
  • auto_add_save_result (bool) – Automatically add a save_result node to the process graph if there is none yet.

  • +
+
+
Return type:
+

BatchJob

+
+
+
+

Changed in version 0.32.0: Added auto_add_save_result option

+
+
+

Added in version 0.36.0: Added argument additional.

+
+
+ +
+
+static execute_local_udf(udf, datacube=None, fmt='netcdf')[source]
+
+

Deprecated since version 0.7.0: Use openeo.udf.run_code.execute_local_udf() instead

+
+
+ +
+
+filter_bands(bands)[source]
+

Filter the data cube by the given bands

+
+
Parameters:
+

bands (Union[List[Union[str, int]], str]) – list of band names, common names or band indices. Single band name can also be given as string.

+
+
Return type:
+

DataCube

+
+
Returns:
+

a DataCube instance

+
+
+
+

See also

+

openeo.org documentation on process “filter_bands”.

+
+
+ +
+
+filter_bbox(*args, west=None, south=None, east=None, north=None, crs=None, base=None, height=None, bbox=None)[source]
+

Limits the data cube to the specified bounding box.

+

The bounding box can be specified in multiple ways.

+
+
    +
  • With keyword arguments:

    +
    >>> cube.filter_bbox(west=3, south=51, east=4, north=52, crs=4326)
    +
    +
    +
  • +
  • With a (west, south, east, north) list or tuple +(note that EPSG:4326 is the default CRS, so it’s not necessary to specify it explicitly):

    +
    >>> cube.filter_bbox([3, 51, 4, 52])
    +>>> cube.filter_bbox(bbox=[3, 51, 4, 52])
    +
    +
    +
  • +
  • With a bbox dictionary:

    +
    >>> bbox = {"west": 3, "south": 51, "east": 4, "north": 52, "crs": 4326}
    +>>> cube.filter_bbox(bbox)
    +>>> cube.filter_bbox(bbox=bbox)
    +>>> cube.filter_bbox(**bbox)
    +
    +
    +
  • +
  • With a shapely geometry (of which the bounding box will be used):

    +
    >>> cube.filter_bbox(geometry)
    +>>> cube.filter_bbox(bbox=geometry)
    +
    +
    +
  • +
  • Passing a parameter:

    +
    >>> bbox_param = Parameter(name="my_bbox", schema="object")
    +>>> cube.filter_bbox(bbox_param)
    +>>> cube.filter_bbox(bbox=bbox_param)
    +
    +
    +
  • +
  • With a CRS other than EPSG 4326:

    +
    >>> cube.filter_bbox(
    +... west=652000, east=672000, north=5161000, south=5181000,
    +... crs=32632
    +... )
    +
    +
    +
  • +
  • Deprecated: positional arguments are also supported, +but follow a non-standard order for legacy reasons:

    +
    >>> west, east, north, south = 3, 4, 52, 51
    +>>> cube.filter_bbox(west, east, north, south)
    +
    +
    +
  • +
+
+
+
Parameters:
+

crs (Union[int, str, None]) – value describing the coordinate reference system. +Typically just an int (interpreted as EPSG code, e.g. 4326) +or a string (handled as authority string, e.g. "EPSG:4326"). +See openeo.util.normalize_crs() for more details about additional normalization that is applied to this argument.

+
+
Return type:
+

DataCube

+
+
+
+

See also

+

openeo.org documentation on process “filter_bbox”.

+
+
+ +
+
+filter_labels(condition, dimension, context=None)[source]
+

Filters the dimension labels in the data cube for the given dimension. +Only the dimension labels that match the specified condition are preserved, +all other labels with their corresponding data get removed.

+
+
Parameters:
+
    +
  • condition (Union[PGNode, Callable]) – the “child callback” which will be given a single label value (number or string) +and returns a boolean expressing if the label should be preserved. +Also see Processes with child “callbacks”.

  • +
  • dimension (str) – The name of the dimension to filter on.

  • +
+
+
Return type:
+

DataCube

+
+
+
+

Added in version 0.27.0.

+
+
+

See also

+

openeo.org documentation on process “filter_labels”.

+
+
+ +
+
+filter_spatial(geometries)[source]
+

Limits the data cube over the spatial dimensions to the specified geometries.

+
+
    +
  • For polygons, the filter retains a pixel in the data cube if the point at the pixel center intersects with +at least one of the polygons (as defined in the Simple Features standard by the OGC).

  • +
  • For points, the process considers the closest pixel center.

  • +
  • For lines (line strings), the process considers all the pixels whose centers are closest to at least one +point on the line.

  • +
+
+

More specifically, pixels outside of the bounding box of the given geometry will not be available after filtering. +All pixels inside the bounding box that are not retained will be set to null (no data).

+
+
Parameters:
+

geometries (Union[BaseGeometry, dict, str, Path, Parameter, VectorCube]) –

One or more geometries used for filtering, Can be provided in different ways:

+
    +
  • a shapely geometry

  • +
  • a GeoJSON-style dictionary,

  • +
  • a public URL to the geometries in a vector format that is supported by the backend +(also see Connection.list_file_formats()), +e.g. GeoJSON, GeoParquet, etc. +A load_url process will automatically be added to the process graph.

  • +
  • a path (str or Path) to a local, client-side GeoJSON file, +which will be loaded automatically to get the geometries as GeoJSON construct.

  • +
  • a VectorCube instance.

  • +
  • a Parameter instance.

  • +
+

+
+
Return type:
+

DataCube

+
+
Returns:
+

A data cube restricted to the specified geometries. The dimensions and dimension properties (name, +type, labels, reference system and resolution) remain unchanged, except that the spatial dimensions have less +(or the same) dimension labels.

+
+
+
+

Changed in version 0.36.0: Support passing a URL as geometries argument, which will be loaded with the load_url process.

+
+
+

Changed in version 0.36.0: Support for passing a backend-side path as geometries argument was removed +(also see Legacy read_vector usage). +Instead, it’s possible to provide a client-side path to a GeoJSON file +(which will be loaded client-side to get the geometries as GeoJSON construct).

+
+
+

See also

+

openeo.org documentation on process “filter_spatial”.

+
+
+ +
+
+filter_temporal(*args, start_date=None, end_date=None, extent=None)[source]
+

Limit the DataCube to a certain date range, which can be specified in several ways:

+
>>> cube.filter_temporal("2019-07-01", "2019-08-01")
+>>> cube.filter_temporal(["2019-07-01", "2019-08-01"])
+>>> cube.filter_temporal(extent=["2019-07-01", "2019-08-01"])
+>>> cube.filter_temporal(start_date="2019-07-01", end_date="2019-08-01"])
+
+
+

See Filter on temporal extent for more details on temporal extent handling and shorthand notation.

+
+
Parameters:
+
    +
  • start_date (Union[str, date, Parameter, PGNode, ProcessBuilderBase, None]) – start date of the filter (inclusive), as a string or date object

  • +
  • end_date (Union[str, date, Parameter, PGNode, ProcessBuilderBase, None]) – end date of the filter (exclusive), as a string or date object

  • +
  • extent (Union[Sequence[Union[str, date, Parameter, PGNode, ProcessBuilderBase, None]], Parameter, str, None]) – temporal extent. +Typically, specified as a two-item list or tuple containing start and end date.

  • +
+
+
Return type:
+

DataCube

+
+
+
+

Changed in version 0.23.0: Arguments start_date, end_date and extent: +add support for year/month shorthand notation as discussed at Year/month shorthand notation.

+
+
+

See also

+

openeo.org documentation on process “filter_temporal”.

+
+
+ +
+
+fit_curve(parameters, function, dimension)[source]
+

Use non-linear least squares to fit a model function y = f(x, parameters) to data.

+

The process throws an InvalidValues exception if invalid values are encountered. +Invalid values are finite numbers (see also is_valid()).

+
+

Warning

+

experimental process: not generally supported, API subject to change. +https://github.com/Open-EO/openeo-processes/pull/240

+
+
+
Parameters:
+
+
+
+
+

See also

+

openeo.org documentation on process “fit_curve”.

+
+
+ +
+
+flat_graph()
+

Get the process graph in internal flat dict representation. +:rtype: Dict[str, dict]

+
+

Warning

+

This method is mainly intended for internal use. +It is not recommended for general use and is subject to change.

+

Instead, it is recommended to use +to_json() or print_json() +to obtain a standardized, interoperable JSON representation of the process graph. +See Export a process graph for more information.

+
+
+ +
+
+flatten_dimensions(dimensions, target_dimension, label_separator=None)[source]
+

Combines multiple given dimensions into a single dimension by flattening the values +and merging the dimension labels with the given label_separator. Non-string dimension labels will +be converted to strings. This process is the opposite of the process unflatten_dimension() +but executing both processes subsequently doesn’t necessarily create a data cube that +is equal to the original data cube.

+
+
Parameters:
+
    +
  • dimensions (List[str]) – The names of the dimension to combine.

  • +
  • target_dimension (str) – The name of a target dimension with a single dimension label to replace.

  • +
  • label_separator (Optional[str]) – The string that will be used as a separator for the concatenated dimension labels.

  • +
+
+
Returns:
+

A data cube with the new shape.

+
+
+
+

Warning

+

experimental process: not generally supported, API subject to change.

+
+
+

Added in version 0.10.0.

+
+
+

See also

+

openeo.org documentation on process “flatten_dimensions”.

+
+
+ +
+
+graph_add_node(process_id, arguments=None, metadata=None, namespace=None, **kwargs)
+
+
Return type:
+

DataCube

+
+
+
+

Deprecated since version 0.1.1: Usage of this legacy method is deprecated. Use +process() instead.

+
+
+ +
+
+linear_scale_range(input_min, input_max, output_min, output_max)[source]
+

Performs a linear transformation between the input and output range.

+

The given number in x is clipped to the bounds specified in inputMin and inputMax so that the underlying formula

+
+

((x - inputMin) / (inputMax - inputMin)) * (outputMax - outputMin) + outputMin

+

never returns any value lower than outputMin or greater than outputMax.

+
+

Potential use case include scaling values to the 8-bit range (0 - 255) often used for numeric representation of +values in one of the channels of the RGB colour model or calculating percentages (0 - 100).

+

The no-data value null is passed through and therefore gets propagated.

+
+
Parameters:
+
    +
  • input_min – Minimum input value

  • +
  • input_max – Maximum input value

  • +
  • output_min – Minimum value of the desired output range.

  • +
  • output_max – Maximum value of the desired output range.

  • +
+
+
Return type:
+

DataCube

+
+
Returns:
+

a DataCube instance

+
+
+
+

See also

+

openeo.org documentation on process “linear_scale_range”.

+
+
+ +
+
+ln()[source]
+
+
Return type:
+

DataCube

+
+
+
+

See also

+

openeo.org documentation on process “ln”.

+
+
+ +
+
+classmethod load_collection(collection_id, connection=None, spatial_extent=None, temporal_extent=None, bands=None, fetch_metadata=True, properties=None, max_cloud_cover=None)[source]
+

Create a new Raster Data cube.

+
+
Parameters:
+
    +
  • collection_id (Union[str, Parameter]) – image collection identifier

  • +
  • connection (Optional[Connection]) – The backend connection to use. +Can be None to work without connection and collection metadata.

  • +
  • spatial_extent (Union[Dict[str, float], Parameter, None]) – limit data to specified bounding box or polygons

  • +
  • temporal_extent (Union[Sequence[Union[str, date, Parameter, PGNode, ProcessBuilderBase, None]], Parameter, str, None]) – limit data to specified temporal interval. +Typically, just a two-item list or tuple containing start and end date. +See Filter on temporal extent for more details on temporal extent handling and shorthand notation.

  • +
  • bands (Union[None, List[str], Parameter]) – only add the specified bands.

  • +
  • properties (Union[None, Dict[str, Union[str, PGNode, Callable]], List[CollectionProperty], CollectionProperty]) – limit data by metadata property predicates. +See collection_property() for easy construction of such predicates.

  • +
  • max_cloud_cover (Optional[float]) – shortcut to set maximum cloud cover (“eo:cloud_cover” collection property)

  • +
+
+
Return type:
+

DataCube

+
+
Returns:
+

new DataCube containing the collection

+
+
+
+

Changed in version 0.13.0: added the max_cloud_cover argument.

+
+
+

Changed in version 0.23.0: Argument temporal_extent: add support for year/month shorthand notation +as discussed at Year/month shorthand notation.

+
+
+

Changed in version 0.26.0: Add collection_property() support to properties argument.

+
+
+

See also

+

openeo.org documentation on process “load_collection”.

+
+
+ +
+
+classmethod load_disk_collection(connection, file_format, glob_pattern, **options)[source]
+

Loads image data from disk as a DataCube. +This is backed by a non-standard process (‘load_disk_data’). This will eventually be replaced by standard options such as +openeo.rest.connection.Connection.load_stac() or https://processes.openeo.org/#load_uploaded_files

+
+
Parameters:
+
    +
  • connection (Connection) – The connection to use to connect with the backend.

  • +
  • file_format (str) – the file format, e.g. ‘GTiff’

  • +
  • glob_pattern (str) – a glob pattern that matches the files to load from disk

  • +
  • options – options specific to the file format

  • +
+
+
Return type:
+

DataCube

+
+
Returns:
+

the data as a DataCube

+
+
+
+

Deprecated since version 0.25.0: Depends on non-standard process, replace with +openeo.rest.connection.Connection.load_stac() where +possible.

+
+
+ +
+
+classmethod load_stac(url, spatial_extent=None, temporal_extent=None, bands=None, properties=None, connection=None)[source]
+

Loads data from a static STAC catalog or a STAC API Collection and returns the data as a processable DataCube. +A batch job result can be loaded by providing a reference to it.

+

If supported by the underlying metadata and file format, the data that is added to the data cube can be +restricted with the parameters spatial_extent, temporal_extent and bands. +If no data is available for the given extents, a NoDataAvailable error is thrown.

+

Remarks:

+
    +
  • The bands (and all dimensions that specify nominal dimension labels) are expected to be ordered as +specified in the metadata if the bands parameter is set to null.

  • +
  • If no additional parameter is specified this would imply that the whole data set is expected to be loaded. +Due to the large size of many data sets, this is not recommended and may be optimized by back-ends to only +load the data that is actually required after evaluating subsequent processes such as filters. +This means that the values should be processed only after the data has been limited to the required extent +and as a consequence also to a manageable size.

  • +
+
+
Parameters:
+
    +
  • url (str) –

    The URL to a static STAC catalog (STAC Item, STAC Collection, or STAC Catalog) +or a specific STAC API Collection that allows to filter items and to download assets. +This includes batch job results, which itself are compliant to STAC. +For external URLs, authentication details such as API keys or tokens may need to be included in the URL.

    +

    Batch job results can be specified in two ways:

    +
      +
    • For Batch job results at the same back-end, a URL pointing to the corresponding batch job results +endpoint should be provided. The URL usually ends with /jobs/{id}/results and {id} +is the corresponding batch job ID.

    • +
    • For external results, a signed URL must be provided. Not all back-ends support signed URLs, +which are provided as a link with the link relation canonical in the batch job result metadata.

    • +
    +

  • +
  • spatial_extent (Union[Dict[str, float], Parameter, None]) –

    Limits the data to load to the specified bounding box or polygons.

    +

    For raster data, the process loads the pixel into the data cube if the point at the pixel center intersects +with the bounding box or any of the polygons (as defined in the Simple Features standard by the OGC).

    +

    For vector data, the process loads the geometry into the data cube if the geometry is fully within the +bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). +Empty geometries may only be in the data cube if no spatial extent has been provided.

    +

    The GeoJSON can be one of the following feature types:

    +
      +
    • A Polygon or MultiPolygon geometry,

    • +
    • a Feature with a Polygon or MultiPolygon geometry, or

    • +
    • a FeatureCollection containing at least one Feature with Polygon or MultiPolygon geometries.

    • +
    +

    Set this parameter to None to set no limit for the spatial extent. +Be careful with this when loading large datasets. It is recommended to use this parameter instead of +using filter_bbox() or filter_spatial() directly after loading unbounded data.

    +

  • +
  • temporal_extent (Union[Sequence[Union[str, date, Parameter, PGNode, ProcessBuilderBase, None]], Parameter, str, None]) –

    Limits the data to load to the specified left-closed temporal interval. +Applies to all temporal dimensions. +The interval has to be specified as an array with exactly two elements:

    +
      +
    1. The first element is the start of the temporal interval. +The specified instance in time is included in the interval.

    2. +
    3. The second element is the end of the temporal interval. +The specified instance in time is excluded from the interval.

    4. +
    +

    The second element must always be greater/later than the first element. +Otherwise, a TemporalExtentEmpty exception is thrown.

    +

    Also supports open intervals by setting one of the boundaries to None, but never both.

    +

    Set this parameter to None to set no limit for the temporal extent. +Be careful with this when loading large datasets. It is recommended to use this parameter instead of +using filter_temporal() directly after loading unbounded data.

    +

  • +
  • bands (Optional[List[str]]) –

    Only adds the specified bands into the data cube so that bands that don’t match the list +of band names are not available. Applies to all dimensions of type bands.

    +

    Either the unique band name (metadata field name in bands) or one of the common band names +(metadata field common_name in bands) can be specified. +If the unique band name and the common name conflict, the unique band name has a higher priority.

    +

    The order of the specified array defines the order of the bands in the data cube. +If multiple bands match a common name, all matched bands are included in the original order.

    +

    It is recommended to use this parameter instead of using filter_bands() directly after loading unbounded data.

    +

  • +
  • properties (Optional[Dict[str, Union[str, PGNode, Callable]]]) –

    Limits the data by metadata properties to include only data in the data cube which +all given conditions return True for (AND operation).

    +

    Specify key-value-pairs with the key being the name of the metadata property, +which can be retrieved with the openEO Data Discovery for Collections. +The value must be a condition (user-defined process) to be evaluated against a STAC API. +This parameter is not supported for static STAC.

    +

  • +
  • connection (Optional[Connection]) – The connection to use to connect with the backend.

  • +
+
+
Return type:
+

DataCube

+
+
+
+

Added in version 0.33.0.

+
+
+ +
+
+log10()[source]
+
+
Return type:
+

DataCube

+
+
+
+

See also

+

openeo.org documentation on process “log”.

+
+
+ +
+
+log2()[source]
+
+
Return type:
+

DataCube

+
+
+
+

See also

+

openeo.org documentation on process “log”.

+
+
+ +
+
+logarithm(base)[source]
+
+
Return type:
+

DataCube

+
+
+
+

See also

+

openeo.org documentation on process “log”.

+
+
+ +
+
+logical_and(other)[source]
+

Apply element-wise logical and operation

+
+
Parameters:
+

other (DataCube)

+
+
Return type:
+

DataCube

+
+
Returns:
+

logical_and(this, other)

+
+
+
+

See also

+

openeo.org documentation on process “and”.

+
+
+ +
+
+logical_or(other)[source]
+

Apply element-wise logical or operation

+
+
Parameters:
+

other (DataCube)

+
+
Return type:
+

DataCube

+
+
Returns:
+

logical_or(this, other)

+
+
+
+

See also

+

openeo.org documentation on process “or”.

+
+
+ +
+
+mask(mask=None, replacement=None)[source]
+

Applies a mask to a raster data cube. To apply a vector mask use mask_polygon.

+

A mask is a raster data cube for which corresponding pixels among data and mask +are compared and those pixels in data are replaced whose pixels in mask are non-zero +(for numbers) or true (for boolean values). +The pixel values are replaced with the value specified for replacement, +which defaults to null (no data).

+
+
Parameters:
+
    +
  • mask (DataCube) – the raster mask

  • +
  • replacement – the value to replace the masked pixels with

  • +
+
+
Return type:
+

DataCube

+
+
+
+

See also

+

openeo.org documentation on process “mask”.

+
+
+ +
+
+mask_polygon(mask, srs=None, replacement=None, inside=None)[source]
+

Applies a polygon mask to a raster data cube. To apply a raster mask use mask.

+

All pixels for which the point at the pixel center does not intersect with any +polygon (as defined in the Simple Features standard by the OGC) are replaced. +This behaviour can be inverted by setting the parameter inside to true.

+

The pixel values are replaced with the value specified for replacement, +which defaults to no data.

+
+
Parameters:
+
    +
  • mask (Union[BaseGeometry, dict, str, Path, Parameter, VectorCube]) –

    The geometry to mask with.an be provided in different ways:

    +
      +
    • a shapely geometry

    • +
    • a GeoJSON-style dictionary,

    • +
    • a public URL to the geometries in a vector format that is supported by the backend +(also see Connection.list_file_formats()), +e.g. GeoJSON, GeoParquet, etc. +A load_url process will automatically be added to the process graph.

    • +
    • a path (str or Path) to a local, client-side GeoJSON file, +which will be loaded automatically to get the geometries as GeoJSON construct.

    • +
    • a VectorCube instance.

    • +
    • a Parameter instance.

    • +
    +

  • +
  • srs (str) –

    The spatial reference system of the provided polygon. +By default longitude-latitude (EPSG:4326) is assumed.

    +
    +

    Note

    +

    this srs argument is a non-standard/experimental feature, only supported by specific back-ends. +See https://github.com/Open-EO/openeo-processes/issues/235 for details.

    +
    +

  • +
  • replacement – the value to replace the masked pixels with

  • +
+
+
Return type:
+

DataCube

+
+
+
+

Changed in version 0.36.0: Support passing a URL as geometries argument, which will be loaded with the load_url process.

+
+
+

Changed in version 0.36.0: Support for passing a backend-side path as geometries argument was removed +(also see Legacy read_vector usage). +Instead, it’s possible to provide a client-side path to a GeoJSON file +(which will be loaded client-side to get the geometries as GeoJSON construct).

+
+
+

See also

+

openeo.org documentation on process “mask_polygon”.

+
+
+ +
+
+max_time()[source]
+

Finds the maximum value of a time series for all bands of the input dataset.

+
+
Return type:
+

DataCube

+
+
Returns:
+

a DataCube instance

+
+
+
+

See also

+

openeo.org documentation on process “max”.

+
+
+ +
+
+mean_time()[source]
+

Finds the mean value of a time series for all bands of the input dataset.

+
+
Return type:
+

DataCube

+
+
Returns:
+

a DataCube instance

+
+
+
+

See also

+

openeo.org documentation on process “mean”.

+
+
+ +
+
+median_time()[source]
+

Finds the median value of a time series for all bands of the input dataset.

+
+
Return type:
+

DataCube

+
+
Returns:
+

a DataCube instance

+
+
+
+

See also

+

openeo.org documentation on process “median”.

+
+
+ +
+
+merge(other, overlap_resolver=None, context=None)
+
+
Return type:
+

DataCube

+
+
+
+

Deprecated since version 0.4.6: Usage of this legacy method is deprecated. Use +merge_cubes() instead.

+
+
+ +
+
+merge_cubes(other, overlap_resolver=None, context=None)[source]
+

Merging two data cubes

+

The data cubes have to be compatible. A merge operation without overlap should be reversible with (a set of) filter operations for each of the two cubes. The process performs the join on overlapping dimensions, with the same name and type. +An overlapping dimension has the same name, type, reference system and resolution in both dimensions, but can have different labels. One of the dimensions can have different labels, for all other dimensions the labels must be equal. If data overlaps, the parameter overlap_resolver must be specified to resolve the overlap.

+

Examples for merging two data cubes:

+
    +
  1. Data cubes with the dimensions x, y, t and bands have the same dimension labels in x,y and t, but the labels for the dimension bands are B1 and B2 for the first cube and B3 and B4. An overlap resolver is not needed. The merged data cube has the dimensions x, y, t and bands and the dimension bands has four dimension labels: B1, B2, B3, B4.

  2. +
  3. Data cubes with the dimensions x, y, t and bands have the same dimension labels in x,y and t, but the labels for the dimension bands are B1 and B2 for the first data cube and B2 and B3 for the second. An overlap resolver is required to resolve overlap in band B2. The merged data cube has the dimensions x, y, t and bands and the dimension bands has three dimension labels: B1, B2, B3.

  4. +
  5. +
    Data cubes with the dimensions x, y and t have the same dimension labels in x,y and t. There are two options:
      +
    • Keep the overlapping values separately in the merged data cube: An overlap resolver is not needed, but for each data cube you need to add a new dimension using add_dimension. The new dimensions must be equal, except that the labels for the new dimensions must differ by name. The merged data cube has the same dimensions and labels as the original data cubes, plus the dimension added with add_dimension, which has the two dimension labels after the merge.

    • +
    • Combine the overlapping values into a single value: An overlap resolver is required to resolve the overlap for all pixels. The merged data cube has the same dimensions and labels as the original data cubes, but all pixel values have been processed by the overlap resolver.

    • +
    +
    +
    +
  6. +
  7. Merging a data cube with dimensions x, y, t with another cube with dimensions x, y will join on the x, y dimension, so the lower dimension cube is merged with each time step in the higher dimensional cube. This can for instance be used to apply a digital elevation model to a spatiotemporal data cube.

  8. +
+
+
Parameters:
+
    +
  • other (DataCube) – The data cube to merge with.

  • +
  • overlap_resolver (Union[str, PGNode, Callable]) – A reduction operator that resolves the conflict if the data overlaps. The reducer must return a value of the same data type as the input values are. The reduction operator may be a single process such as multiply or consist of multiple sub-processes. null (the default) can be specified if no overlap resolver is required.

  • +
  • context (Optional[dict]) – Additional data to be passed to the process.

  • +
+
+
Return type:
+

DataCube

+
+
Returns:
+

The merged data cube.

+
+
+
+

See also

+

openeo.org documentation on process “merge_cubes”.

+
+
+ +
+
+min_time()[source]
+

Finds the minimum value of a time series for all bands of the input dataset.

+
+
Return type:
+

DataCube

+
+
Returns:
+

a DataCube instance

+
+
+
+

See also

+

openeo.org documentation on process “min”.

+
+
+ +
+
+multiply(other, reverse=False)[source]
+
+
Return type:
+

DataCube

+
+
+
+

See also

+

openeo.org documentation on process “multiply”.

+
+
+ +
+
+ndvi(nir=None, red=None, target_band=None)[source]
+

Normalized Difference Vegetation Index (NDVI)

+
+
Parameters:
+
    +
  • nir (str) – (optional) name of NIR band

  • +
  • red (str) – (optional) name of red band

  • +
  • target_band (str) – (optional) name of the newly created band

  • +
+
+
Return type:
+

DataCube

+
+
Returns:
+

a DataCube instance

+
+
+
+

See also

+

openeo.org documentation on process “ndvi”.

+
+
+ +
+
+normalized_difference(other)[source]
+
+
Return type:
+

DataCube

+
+
+
+

See also

+

openeo.org documentation on process “normalized_difference”.

+
+
+ +
+
+polygonal_histogram_timeseries(polygon)[source]
+

Extract a histogram time series for the given (multi)polygon. Its points are +expected to be in the EPSG:4326 coordinate +reference system.

+
+
Parameters:
+

polygon (Union[Polygon, MultiPolygon, str]) – The (multi)polygon; or a file path or HTTP URL to a GeoJSON file or shape file

+
+
Return type:
+

VectorCube

+
+
+
+

Deprecated since version 0.10.0: Use aggregate_spatial() with reducer 'histogram'.

+
+
+ +
+
+polygonal_mean_timeseries(polygon)[source]
+

Extract a mean time series for the given (multi)polygon. Its points are +expected to be in the EPSG:4326 coordinate +reference system.

+
+
Parameters:
+

polygon (Union[Polygon, MultiPolygon, str]) – The (multi)polygon; or a file path or HTTP URL to a GeoJSON file or shape file

+
+
Return type:
+

VectorCube

+
+
+
+

Deprecated since version 0.10.0: Use aggregate_spatial() with reducer 'mean'.

+
+
+ +
+
+polygonal_median_timeseries(polygon)[source]
+

Extract a median time series for the given (multi)polygon. Its points are +expected to be in the EPSG:4326 coordinate +reference system.

+
+
Parameters:
+

polygon (Union[Polygon, MultiPolygon, str]) – The (multi)polygon; or a file path or HTTP URL to a GeoJSON file or shape file

+
+
Return type:
+

VectorCube

+
+
+
+

Deprecated since version 0.10.0: Use aggregate_spatial() with reducer 'median'.

+
+
+ +
+
+polygonal_standarddeviation_timeseries(polygon)[source]
+

Extract a time series of standard deviations for the given (multi)polygon. Its points are +expected to be in the EPSG:4326 coordinate +reference system.

+
+
Parameters:
+

polygon (Union[Polygon, MultiPolygon, str]) – The (multi)polygon; or a file path or HTTP URL to a GeoJSON file or shape file

+
+
Return type:
+

VectorCube

+
+
+
+

Deprecated since version 0.10.0: Use aggregate_spatial() with reducer 'sd'.

+
+
+ +
+
+power(p)[source]
+
+

See also

+

openeo.org documentation on process “power”.

+
+
+ +
+
+predict_curve(parameters, function, dimension, labels=None)[source]
+

Predict values using a model function and pre-computed parameters.

+
+

Warning

+

experimental process: not generally supported, API subject to change. +https://github.com/Open-EO/openeo-processes/pull/240

+
+
+
Parameters:
+
+
+
+
+

See also

+

openeo.org documentation on process “predict_curve”.

+
+
+ +
+
+predict_random_forest(model, dimension='bands')[source]
+

Apply reduce_dimension process with a predict_random_forest reducer.

+
+
Parameters:
+
    +
  • model (Union[str, BatchJob, MlModel]) –

    a reference to a trained model, one of

    +
      +
    • a MlModel instance (e.g. loaded from Connection.load_ml_model())

    • +
    • a BatchJob instance of a batch job that saved a single random forest model

    • +
    • a job id (str) of a batch job that saved a single random forest model

    • +
    • a STAC item URL (str) to load the random forest from. +(The STAC Item must implement the ml-model extension.)

    • +
    +

  • +
  • dimension (str) – dimension along which to apply the reduce_dimension process.

  • +
+
+
+
+

Added in version 0.10.0.

+
+
+

See also

+

openeo.org documentation on process “predict_random_forest”.

+
+
+ +
+
+preview(center=None, zoom=None)[source]
+

Creates a service with the process graph and displays a map widget. Only supports XYZ.

+
+
Parameters:
+
    +
  • center (Optional[Iterable]) – (optional) Map center. Default is (0,0).

  • +
  • zoom (Optional[int]) – (optional) Zoom level of the map. Default is 1.

  • +
+
+
Returns:
+

ipyleaflet Map object and the displayed Service

+
+
+
+

Warning

+

experimental feature, subject to change.

+
+
+

Added in version 0.19.0.

+
+
+ +
+
+print_json(*, file=None, indent=2, separators=None, end='\\n')
+

Print interoperable JSON representation of the process graph.

+

See DataCube.to_json() to get the JSON representation as a string +and Export a process graph for more usage information.

+

Also see json.dumps docs for more information on the JSON formatting options.

+
+
Parameters:
+
    +
  • file – file-like object (stream) to print to (current sys.stdout by default). +Or a path (string or pathlib.Path) to a file to write to.

  • +
  • indent (Optional[int]) – JSON indentation level.

  • +
  • separators (Optional[Tuple[str, str]]) – (optional) tuple of item/key separators.

  • +
  • end (str) – additional string to be printed at the end (newline by default).

  • +
+
+
+
+

Added in version 0.12.0.

+
+
+

Added in version 0.23.0: added the end argument.

+
+
+ +
+
+process(process_id, arguments=None, metadata=None, namespace=None, **kwargs)[source]
+

Generic helper to create a new DataCube by applying a process.

+
+
Parameters:
+
    +
  • process_id (str) – process id of the process.

  • +
  • arguments (Optional[dict]) – argument dictionary for the process.

  • +
  • metadata (Optional[CollectionMetadata]) – optional: metadata to override original cube metadata (e.g. when reducing dimensions)

  • +
  • namespace (Optional[str]) – optional: process namespace

  • +
+
+
Return type:
+

DataCube

+
+
Returns:
+

new DataCube instance

+
+
+
+ +
+
+process_with_node(pg, metadata=None)[source]
+

Generic helper to create a new DataCube by applying a process (given as process graph node)

+
+
Parameters:
+
    +
  • pg (PGNode) – process graph node (containing process id and arguments)

  • +
  • metadata (Optional[CollectionMetadata]) – optional: metadata to override original cube metadata (e.g. when reducing dimensions)

  • +
+
+
Return type:
+

DataCube

+
+
Returns:
+

new DataCube instance

+
+
+
+ +
+
+raster_to_vector()[source]
+

Converts this raster data cube into a VectorCube. +The bounding polygon of homogenous areas of pixels is constructed.

+
+

Warning

+

experimental process: not generally supported, API subject to change.

+
+
+
Return type:
+

VectorCube

+
+
Returns:
+

a VectorCube

+
+
+
+ +
+
+reduce_bands(reducer)[source]
+

Shortcut for reduce_dimension() along the band dimension

+
+
Parameters:
+

reducer (Union[str, PGNode, Callable, UDF]) – “child callback” function, see Processes with child “callbacks”

+
+
Return type:
+

DataCube

+
+
+
+ +
+
+reduce_bands_udf(code, runtime=None, version=None)[source]
+

Use reduce_dimension process with given UDF along band/spectral dimension. +:rtype: DataCube

+
+

Deprecated since version 0.13.0: Use reduce_bands() with UDF as reducer.

+
+
+ +
+
+reduce_dimension(dimension, reducer, context=None, process_id='reduce_dimension', band_math_mode=False)[source]
+

Add a reduce process with given reducer callback along given dimension

+
+
Parameters:
+
    +
  • dimension (str) – the label of the dimension to reduce

  • +
  • reducer (Union[str, Callable, UDF, PGNode]) –

    the “child callback”: +the name of a single openEO process, +or a callback function as discussed in Processes with child “callbacks”, +or a UDF instance.

    +

    The callback should correspond to a process that +receives an array of numerical values +and returns a single numerical value. +For example:

    + +

  • +
  • context (Optional[dict]) – Additional data to be passed to the process.

  • +
+
+
Return type:
+

DataCube

+
+
+
+

See also

+

openeo.org documentation on process “reduce_dimension”.

+
+
+ +
+
+reduce_spatial(reducer, context=None)[source]
+

Add a reduce process with given reducer callback along the spatial dimensions

+
+
Parameters:
+
    +
  • reducer (Union[str, Callable, UDF, PGNode]) –

    the “child callback”: +the name of a single openEO process, +or a callback function as discussed in Processes with child “callbacks”, +or a UDF instance.

    +

    The callback should correspond to a process that +receives an array of numerical values +and returns a single numerical value. +For example:

    + +

  • +
  • context (Optional[dict]) – Additional data to be passed to the process.

  • +
+
+
Return type:
+

DataCube

+
+
+
+

See also

+

openeo.org documentation on process “reduce_spatial”.

+
+
+ +
+
+reduce_temporal(reducer)[source]
+

Shortcut for reduce_dimension() along the temporal dimension

+
+
Parameters:
+

reducer (Union[str, PGNode, Callable, UDF]) – “child callback” function, see Processes with child “callbacks”

+
+
Return type:
+

DataCube

+
+
+
+ +
+
+reduce_temporal_simple(reducer)
+
+
Return type:
+

DataCube

+
+
+
+

Deprecated since version 0.13.0: Usage of this legacy method is deprecated. Use +reduce_temporal() instead.

+
+
+ +
+
+reduce_temporal_udf(code, runtime='Python', version='latest')[source]
+

Apply reduce (reduce_dimension) process with given UDF along temporal dimension.

+
+
Parameters:
+
    +
  • code (str) – The UDF code, compatible with the given runtime and version

  • +
  • runtime – The UDF runtime

  • +
  • version – The UDF runtime version

  • +
+
+
+
+

Deprecated since version 0.13.0: Use reduce_temporal() with UDF as reducer

+
+
+ +
+
+reduce_tiles_over_time(code, runtime='Python', version='latest')
+
+

Deprecated since version 0.1.1: Usage of this legacy method is deprecated. Use +reduce_temporal_udf() instead.

+
+
+ +
+
+rename_dimension(source, target)[source]
+

Renames a dimension in the data cube while preserving all other properties.

+
+
Parameters:
+
    +
  • source (str) – The current name of the dimension. Fails with a DimensionNotAvailable error if the specified dimension does not exist.

  • +
  • target (str) – A new Name for the dimension. Fails with a DimensionExists error if a dimension with the specified name exists.

  • +
+
+
Returns:
+

A new datacube with the dimension renamed.

+
+
+
+

See also

+

openeo.org documentation on process “rename_dimension”.

+
+
+ +
+
+rename_labels(dimension, target, source=None)[source]
+

Renames the labels of the specified dimension in the data cube from source to target.

+
+
Parameters:
+
    +
  • dimension (str) – Dimension name

  • +
  • target (list) – The new names for the labels.

  • +
  • source (list) – The names of the labels as they are currently in the data cube.

  • +
+
+
Return type:
+

DataCube

+
+
Returns:
+

An DataCube instance

+
+
+
+

See also

+

openeo.org documentation on process “rename_labels”.

+
+
+ +
+
+resample_cube_spatial(target, method='near')[source]
+

Resamples the spatial dimensions (x,y) from a source data cube to align with the corresponding +dimensions of the given target data cube. +Returns a new data cube with the resampled dimensions.

+

To resample a data cube to a specific resolution or projection regardless of an existing target +data cube, refer to resample_spatial().

+
+
Parameters:
+
    +
  • target (DataCube) – A data cube that describes the spatial target resolution.

  • +
  • method (str) – Resampling method to use.

  • +
+
+
Return type:
+

DataCube

+
+
Returns:
+

+
+
+
+ +
+
+resample_cube_temporal(target, dimension=None, valid_within=None)[source]
+

Resamples one or more given temporal dimensions from a source data cube to align with the corresponding +dimensions of the given target data cube using the nearest neighbor method. +Returns a new data cube with the resampled dimensions.

+

By default, this process simply takes the nearest neighbor independent of the value (including values such as +no-data / null). Depending on the data cubes this may lead to values being assigned to two target timestamps. +To only consider valid values in a specific range around the target timestamps, use the parameter valid_within.

+

The rare case of ties is resolved by choosing the earlier timestamps.

+
+
Parameters:
+
    +
  • target (DataCube) – A data cube that describes the temporal target resolution.

  • +
  • dimension (Optional[str]) – The name of the temporal dimension to resample.

  • +
  • valid_within (Optional[int])

  • +
+
+
Return type:
+

DataCube

+
+
Returns:
+

+
+
+
+

Added in version 0.10.0.

+
+
+

See also

+

openeo.org documentation on process “resample_cube_temporal”.

+
+
+ +
+
+resample_spatial(resolution, projection=None, method='near', align='upper-left')[source]
+
+
Return type:
+

DataCube

+
+
+
+

See also

+

openeo.org documentation on process “resample_spatial”.

+
+
+ +
+
+resolution_merge(high_resolution_bands, low_resolution_bands, method=None)[source]
+

Resolution merging algorithms try to improve the spatial resolution of lower resolution bands +(e.g. Sentinel-2 20M) based on higher resolution bands. (e.g. Sentinel-2 10M).

+

External references:

+

Pansharpening explained

+

Example publication: ‘Improving the Spatial Resolution of Land Surface Phenology by Fusing Medium- and +Coarse-Resolution Inputs’

+
+

Warning

+

experimental process: not generally supported, API subject to change.

+
+
+
Parameters:
+
    +
  • high_resolution_bands (List[str]) – A list of band names to use as ‘high-resolution’ band. Either the unique band name (metadata field name in bands) or one of the common band names (metadata field common_name in bands). If unique band name and common name conflict, the unique band name has higher priority. The order of the specified array defines the order of the bands in the data cube. If multiple bands match a common name, all matched bands are included in the original order. These bands will remain unmodified.

  • +
  • low_resolution_bands (List[str]) – A list of band names for which the spatial resolution should be increased. Either the unique band name (metadata field name in bands) or one of the common band names (metadata field common_name in bands). If unique band name and common name conflict, the unique band name has higher priority. The order of the specified array defines the order of the bands in the data cube. If multiple bands match a common name, all matched bands are included in the original order. These bands will be modified by the process.

  • +
  • method (str) – The method to use. The supported algorithms can vary between back-ends. Set to null (the default) to allow the back-end to choose, which will improve portability, but reduce reproducibility..

  • +
+
+
Return type:
+

DataCube

+
+
Returns:
+

A datacube with the same bands and metadata as the input, but algorithmically increased spatial resolution for the selected bands.

+
+
+
+

See also

+

openeo.org documentation on process “resolution_merge”.

+
+
+ +
+
+result_node()
+

Get the current result node (PGNode) of the process graph. +:rtype: PGNode

+
+

Added in version 0.10.1.

+
+
+ +
+
+sar_backscatter(coefficient='gamma0-terrain', elevation_model=None, mask=False, contributing_area=False, local_incidence_angle=False, ellipsoid_incidence_angle=False, noise_removal=True, options=None)[source]
+

Computes backscatter from SAR input.

+

Note that backscatter computation may require instrument specific metadata that is tightly coupled to the +original SAR products. As a result, this process may only work in combination with loading data from +specific collections, not with general data cubes.

+
+
Parameters:
+
    +
  • coefficient (Optional[str]) –

    Select the radiometric correction coefficient. +The following options are available:

    +
      +
    • ”beta0”: radar brightness

    • +
    • ”sigma0-ellipsoid”: ground area computed with ellipsoid earth model

    • +
    • ”sigma0-terrain”: ground area computed with terrain earth model

    • +
    • ”gamma0-ellipsoid”: ground area computed with ellipsoid earth model in sensor line of sight

    • +
    • ”gamma0-terrain”: ground area computed with terrain earth model in sensor line of sight (default)

    • +
    • None: non-normalized backscatter

    • +
    +

  • +
  • elevation_model (Optional[str]) – The digital elevation model to use. Set to None (the default) to allow +the back-end to choose, which will improve portability, but reduce reproducibility.

  • +
  • mask (bool) – If set to true, a data mask is added to the bands with the name mask. +It indicates which values are valid (1), invalid (0) or contain no-data (null).

  • +
  • contributing_area (bool) – If set to true, a DEM-based local contributing area band named contributing_area +is added. The values are given in square meters.

  • +
  • local_incidence_angle (bool) – If set to true, a DEM-based local incidence angle band named +local_incidence_angle is added. The values are given in degrees.

  • +
  • ellipsoid_incidence_angle (bool) – If set to true, an ellipsoidal incidence angle band named +ellipsoid_incidence_angle is added. The values are given in degrees.

  • +
  • noise_removal (bool) – If set to false, no noise removal is applied. Defaults to true, which removes noise.

  • +
  • options (Optional[dict]) – dictionary with additional (backend-specific) options.

  • +
+
+
Return type:
+

DataCube

+
+
Returns:
+

+
+
+
+

Added in version 0.4.9.

+
+
+

Changed in version 0.4.10: replace orthorectify and rtc arguments with coefficient.

+
+
+

See also

+

openeo.org documentation on process “sar_backscatter”.

+
+
+ +
+
+save_result(format='GTiff', options=None)[source]
+
+
Return type:
+

DataCube

+
+
+
+

See also

+

openeo.org documentation on process “save_result”.

+
+
+ +
+
+save_user_defined_process(user_defined_process_id, public=False, summary=None, description=None, returns=None, categories=None, examples=None, links=None)[source]
+

Saves this process graph in the backend as a user-defined process for the authenticated user.

+
+
Parameters:
+
    +
  • user_defined_process_id (str) – unique identifier for the process

  • +
  • public (bool) – visible to other users?

  • +
  • summary (Optional[str]) – A short summary of what the process does.

  • +
  • description (Optional[str]) – Detailed description to explain the entity. CommonMark 0.29 syntax MAY be used for rich text representation.

  • +
  • returns (Optional[dict]) – Description and schema of the return value.

  • +
  • categories (Optional[List[str]]) – A list of categories.

  • +
  • examples (Optional[List[dict]]) – A list of examples.

  • +
  • links (Optional[List[dict]]) – A list of links.

  • +
+
+
Return type:
+

RESTUserDefinedProcess

+
+
Returns:
+

a RESTUserDefinedProcess instance

+
+
+
+ +
+
+send_job(out_format=None, *, title=None, description=None, plan=None, budget=None, additional=None, job_options=None, validate=None, auto_add_save_result=True, **format_options)
+
+
Return type:
+

BatchJob

+
+
+
+

Deprecated since version 0.10.0: Usage of this legacy method is deprecated. Use +create_job() instead.

+
+
+ +
+
+subtract(other, reverse=False)[source]
+
+
Return type:
+

DataCube

+
+
+
+

See also

+

openeo.org documentation on process “subtract”.

+
+
+ +
+
+to_json(*, indent=2, separators=None)
+

Get interoperable JSON representation of the process graph.

+

See DataCube.print_json() to directly print the JSON representation +and Export a process graph for more usage information.

+

Also see json.dumps docs for more information on the JSON formatting options.

+
+
Parameters:
+
    +
  • indent (Optional[int]) – JSON indentation level.

  • +
  • separators (Optional[Tuple[str, str]]) – (optional) tuple of item/key separators.

  • +
+
+
Return type:
+

str

+
+
Returns:
+

JSON string

+
+
+
+ +
+
+unflatten_dimension(dimension, target_dimensions, label_separator=None)[source]
+

Splits a single dimension into multiple dimensions by systematically extracting values and splitting +the dimension labels by the given label_separator. +This process is the opposite of the process flatten_dimensions() but executing both processes +subsequently doesn’t necessarily create a data cube that is equal to the original data cube.

+
+
Parameters:
+
    +
  • dimension (str) – The name of the dimension to split.

  • +
  • target_dimensions (List[str]) – The names of the target dimensions.

  • +
  • label_separator (Optional[str]) – The string that will be used as a separator to split the dimension labels.

  • +
+
+
Returns:
+

A data cube with the new shape.

+
+
+
+

Warning

+

experimental process: not generally supported, API subject to change.

+
+
+

Added in version 0.10.0.

+
+
+

See also

+

openeo.org documentation on process “unflatten_dimension”.

+
+
+ +
+
+validate()[source]
+

Validate a process graph without executing it.

+
+
Return type:
+

List[dict]

+
+
Returns:
+

list of errors (dictionaries with “code” and “message” fields)

+
+
+
+ +
+ +
+
+class openeo.rest._datacube.UDF(code, runtime=None, data=None, version=None, context=None, _source=None)[source]
+

Helper class to load UDF code (e.g. from file) and embed them as “callback” or child process in a process graph.

+

Usage example:

+
udf = UDF.from_file("my-udf-code.py")
+cube = cube.apply(process=udf)
+
+
+
+

Changed in version 0.13.0: Added auto-detection of runtime. +Specifying the data argument is not necessary anymore, and actually deprecated. +Added from_file() to simplify loading UDF code from a file. +See openeo.UDF API and usage changes in version 0.13.0 for more background about the changes.

+
+
+
+classmethod from_file(path, runtime=None, version=None, context=None)[source]
+

Load a UDF from a local file.

+
+

See also

+

from_url() for loading from a URL.

+
+
+
Parameters:
+
    +
  • path (Union[str, Path]) – path to the local file with UDF source code

  • +
  • runtime (Optional[str]) – optional UDF runtime identifier, will be auto-detected from source code if omitted.

  • +
  • version (Optional[str]) – optional UDF runtime version string

  • +
  • context (Optional[dict]) – optional additional UDF context data

  • +
+
+
Return type:
+

UDF

+
+
+
+ +
+
+classmethod from_url(url, runtime=None, version=None, context=None)[source]
+

Load a UDF from a URL.

+
+

See also

+

from_file() for loading from a local file.

+
+
+
Parameters:
+
    +
  • url (str) – URL path to load the UDF source code from

  • +
  • runtime (Optional[str]) – optional UDF runtime identifier, will be auto-detected from source code if omitted.

  • +
  • version (Optional[str]) – optional UDF runtime version string

  • +
  • context (Optional[dict]) – optional additional UDF context data

  • +
+
+
Return type:
+

UDF

+
+
+
+ +
+
+get_run_udf_callback(connection=None, data_parameter='data')[source]
+

For internal use: construct run_udf node to be used as callback in apply, reduce_dimension, …

+
+
Return type:
+

PGNode

+
+
+
+ +
+ +
+
+

openeo.rest.vectorcube

+
+
+class openeo.rest.vectorcube.VectorCube(graph, connection, metadata=None)[source]
+

A Vector Cube, or ‘Vector Collection’ is a data structure containing ‘Features’: +https://www.w3.org/TR/sdw-bp/#dfn-feature

+

The features in this cube are restricted to have a geometry. Geometries can be points, lines, polygons etcetera. +A geometry is specified in a ‘coordinate reference system’. https://www.w3.org/TR/sdw-bp/#dfn-coordinate-reference-system-(crs)

+
+
+apply_dimension(process, dimension, target_dimension=None, context=None)[source]
+

Applies a process to all values along a dimension of a data cube. +For example, if the temporal dimension is specified the process will work on the values of a time series.

+

The process to apply is specified by providing a callback function in the process argument.

+
+
Parameters:
+
    +
  • process (Union[str, Callable, UDF, PGNode]) –

    the “child callback”: +the name of a single process, +or a callback function as discussed in Processes with child “callbacks”, +or a UDF instance.

    +

    The callback should correspond to a process that +receives an array of numerical values +and returns an array of numerical values. +For example:

    + +

  • +
  • dimension (str) – The name of the source dimension to apply the process on. Fails with a DimensionNotAvailable error if the specified dimension does not exist.

  • +
  • target_dimension (Optional[str]) – The name of the target dimension or null (the default) to use the source dimension +specified in the parameter dimension. By specifying a target dimension, the source dimension is removed. +The target dimension with the specified name and the type other (see add_dimension) is created, if it doesn’t exist yet.

  • +
  • context (Optional[dict]) – Additional data to be passed to the process.

  • +
+
+
Return type:
+

VectorCube

+
+
Returns:
+

A datacube with the UDF applied to the given dimension.

+
+
Raises:
+

DimensionNotAvailable

+
+
+
+

Added in version 0.22.0.

+
+
+

See also

+

openeo.org documentation on process “apply_dimension”.

+
+
+ +
+
+create_job(out_format=None, *, title=None, description=None, plan=None, budget=None, additional=None, job_options=None, validate=None, auto_add_save_result=True, **format_options)[source]
+

Sends a job to the backend and returns a ClientJob instance.

+
+
Parameters:
+
    +
  • out_format (Optional[str]) – String Format of the job result.

  • +
  • title (Optional[str]) – job title

  • +
  • description (Optional[str]) – job description

  • +
  • plan (Optional[str]) – The billing plan to process and charge the job with

  • +
  • budget (Optional[float]) – Maximum budget to be spent on executing the job. +Note that some backends do not honor this limit.

  • +
  • additional (Optional[dict]) – additional (top-level) properties to set in the request body

  • +
  • job_options (Optional[dict]) – dictionary of job options to pass to the backend +(under top-level property “job_options”)

  • +
  • format_options – String Parameters for the job result format

  • +
  • validate (Optional[bool]) – Optional toggle to enable/prevent validation of the process graphs before execution +(overruling the connection’s auto_validate setting).

  • +
  • auto_add_save_result (bool) – Automatically add a save_result node to the process graph if there is none yet.

  • +
+
+
Return type:
+

BatchJob

+
+
Returns:
+

Created job.

+
+
+
+

Changed in version 0.32.0: Added auto_add_save_result option

+
+
+ +
+
+download(outputfile=None, format=None, options=None, *, validate=None, auto_add_save_result=True)[source]
+

Execute synchronously and download the vector cube.

+

The result will be stored to the output path, when specified. +If no output path (or None) is given, the raw download content will be returned as bytes object.

+
+
Parameters:
+
    +
  • outputfile (Union[str, Path, None]) – (optional) output file to store the result to

  • +
  • format (Optional[str]) – (optional) output format to use.

  • +
  • options (Optional[dict]) – (optional) additional output format options.

  • +
  • validate (Optional[bool]) – Optional toggle to enable/prevent validation of the process graphs before execution +(overruling the connection’s auto_validate setting).

  • +
  • auto_add_save_result (bool) – Automatically add a save_result node to the process graph if there is none yet.

  • +
+
+
Return type:
+

Optional[bytes]

+
+
+
+

Changed in version 0.21.0: When not specified explicitly, output format is guessed from output file extension.

+
+
+

Changed in version 0.32.0: Added auto_add_save_result option

+
+
+ +
+
+execute(*, validate=None)[source]
+

Executes the process graph.

+
+
Return type:
+

dict

+
+
+
+ +
+
+execute_batch(outputfile=None, out_format=None, *, title=None, description=None, plan=None, budget=None, print=<built-in function print>, max_poll_interval=60, connection_retry_interval=30, additional=None, job_options=None, validate=None, auto_add_save_result=True, **format_options)[source]
+

Evaluate the process graph by creating a batch job, and retrieving the results when it is finished. +This method is mostly recommended if the batch job is expected to run in a reasonable amount of time.

+

For very long running jobs, you probably do not want to keep the client running.

+
+
Parameters:
+
    +
  • additional (Optional[dict]) – additional (top-level) properties to set in the request body

  • +
  • job_options (Optional[dict]) – dictionary of job options to pass to the backend +(under top-level property “job_options”)

  • +
  • outputfile (Union[str, Path, None]) – The path of a file to which a result can be written

  • +
  • out_format (Optional[str]) – (optional) output format to use.

  • +
  • format_options – (optional) additional output format options

  • +
  • validate (Optional[bool]) – Optional toggle to enable/prevent validation of the process graphs before execution +(overruling the connection’s auto_validate setting).

  • +
  • auto_add_save_result (bool) – Automatically add a save_result node to the process graph if there is none yet.

  • +
+
+
Return type:
+

BatchJob

+
+
+
+

Changed in version 0.21.0: When not specified explicitly, output format is guessed from output file extension.

+
+
+

Changed in version 0.32.0: Added auto_add_save_result option

+
+
+

Added in version 0.36.0: Added argument additional.

+
+
+ +
+
+filter_bands(bands)[source]
+
+
Return type:
+

VectorCube

+
+
+
+

Added in version 0.22.0.

+
+
+

See also

+

openeo.org documentation on process “filter_bands”.

+
+
+ +
+
+filter_bbox(*, west=None, south=None, east=None, north=None, extent=None, crs=None)[source]
+
+
Return type:
+

VectorCube

+
+
+
+

Added in version 0.22.0.

+
+
+

See also

+

openeo.org documentation on process “filter_bbox”.

+
+
+ +
+
+filter_labels(condition, dimension, context=None)[source]
+

Filters the dimension labels in the data cube for the given dimension. +Only the dimension labels that match the specified condition are preserved, +all other labels with their corresponding data get removed.

+
+
Parameters:
+
    +
  • condition (Union[PGNode, Callable]) – the “child callback” which will be given a single label value (number or string) +and returns a boolean expressing if the label should be preserved. +Also see Processes with child “callbacks”.

  • +
  • dimension (str) – The name of the dimension to filter on.

  • +
+
+
Return type:
+

VectorCube

+
+
+
+

Added in version 0.22.0.

+
+
+

See also

+

openeo.org documentation on process “filter_labels”.

+
+
+ +
+
+filter_vector(geometries, relation='intersects')[source]
+
+
Return type:
+

VectorCube

+
+
+
+

Added in version 0.22.0.

+
+
+

See also

+

openeo.org documentation on process “filter_vector”.

+
+
+ +
+
+fit_class_random_forest(target, max_variables=None, num_trees=100, seed=None)[source]
+

Executes the fit of a random forest classification based on the user input of target and predictors. +The Random Forest classification model is based on the approach by Breiman (2001).

+
+

Warning

+

EXPERIMENTAL: not generally supported, API subject to change.

+
+
+
Parameters:
+
    +
  • target (dict) – The training sites for the classification model as a vector data cube. This is associated with the target +variable for the Random Forest model. The geometry has to be associated with a value to predict (e.g. fractional +forest canopy cover).

  • +
  • max_variables (Optional[int]) – Specifies how many split variables will be used at a node. Default value is null, which corresponds to the +number of predictors divided by 3.

  • +
  • num_trees (int) – The number of trees build within the Random Forest classification.

  • +
  • seed (Optional[int]) – A randomization seed to use for the random sampling in training.

  • +
+
+
Return type:
+

MlModel

+
+
+
+

Added in version 0.16.0: Originally added in version 0.10.0 as DataCube method, +but moved to VectorCube in version 0.16.0.

+
+
+

See also

+

openeo.org documentation on process “fit_class_random_forest”.

+
+
+ +
+
+fit_regr_random_forest(target, max_variables=None, num_trees=100, seed=None)[source]
+

Executes the fit of a random forest regression based on training data. +The Random Forest regression model is based on the approach by Breiman (2001).

+
+

Warning

+

EXPERIMENTAL: not generally supported, API subject to change.

+
+
+
Parameters:
+
    +
  • target (dict) – The training sites for the regression model as a vector data cube. +This is associated with the target variable for the Random Forest model. +The geometry has to associated with a value to predict (e.g. fractional forest canopy cover).

  • +
  • max_variables (Optional[int]) – Specifies how many split variables will be used at a node. Default value is null, which corresponds to the +number of predictors divided by 3.

  • +
  • num_trees (int) – The number of trees build within the Random Forest classification.

  • +
  • seed (Optional[int]) – A randomization seed to use for the random sampling in training.

  • +
+
+
Return type:
+

MlModel

+
+
+
+

Added in version 0.16.0: Originally added in version 0.10.0 as DataCube method, +but moved to VectorCube in version 0.16.0.

+
+
+

See also

+

openeo.org documentation on process “fit_regr_random_forest”.

+
+
+ +
+
+flat_graph()
+

Get the process graph in internal flat dict representation. +:rtype: Dict[str, dict]

+
+

Warning

+

This method is mainly intended for internal use. +It is not recommended for general use and is subject to change.

+

Instead, it is recommended to use +to_json() or print_json() +to obtain a standardized, interoperable JSON representation of the process graph. +See Export a process graph for more information.

+
+
+ +
+
+classmethod load_geojson(connection, data, properties=None)[source]
+

Converts GeoJSON data as defined by RFC 7946 into a vector data cube.

+
+
Parameters:
+
    +
  • connection (Connection) – the connection to use to connect with the openEO back-end.

  • +
  • data (Union[dict, str, Path, BaseGeometry, Parameter]) –

    the geometry to load. One of:

    +
      +
    • GeoJSON-style data structure: e.g. a dictionary with "type": "Polygon" and "coordinates" fields

    • +
    • a path to a local GeoJSON file

    • +
    • a GeoJSON string

    • +
    • a shapely geometry object

    • +
    +

  • +
  • properties (Optional[List[str]]) – A list of properties from the GeoJSON file to construct an additional dimension from.

  • +
+
+
Return type:
+

VectorCube

+
+
Returns:
+

new VectorCube instance

+
+
+
+

Warning

+

EXPERIMENTAL: this process is experimental with the potential for major things to change.

+
+
+

Added in version 0.22.0.

+
+
+

See also

+

openeo.org documentation on process “load_geojson”.

+
+
+ +
+
+classmethod load_url(connection, url, format, options=None)[source]
+

Loads a file from a URL

+
+
Parameters:
+
    +
  • connection (Connection) – the connection to use to connect with the openEO back-end.

  • +
  • url (str) – The URL to read from. Authentication details such as API keys or tokens may need to be included in the URL.

  • +
  • format (str) – The file format to use when loading the data.

  • +
  • options (Optional[dict]) – The file format parameters to use when reading the data. +Must correspond to the parameters that the server reports as supported parameters for the chosen format

  • +
+
+
Return type:
+

VectorCube

+
+
Returns:
+

new VectorCube instance

+
+
+
+

Warning

+

EXPERIMENTAL: this process is experimental with the potential for major things to change.

+
+
+

Added in version 0.22.0.

+
+
+

See also

+

openeo.org documentation on process “load_url”.

+
+
+ +
+
+print_json(*, file=None, indent=2, separators=None, end='\\n')
+

Print interoperable JSON representation of the process graph.

+

See DataCube.to_json() to get the JSON representation as a string +and Export a process graph for more usage information.

+

Also see json.dumps docs for more information on the JSON formatting options.

+
+
Parameters:
+
    +
  • file – file-like object (stream) to print to (current sys.stdout by default). +Or a path (string or pathlib.Path) to a file to write to.

  • +
  • indent (Optional[int]) – JSON indentation level.

  • +
  • separators (Optional[Tuple[str, str]]) – (optional) tuple of item/key separators.

  • +
  • end (str) – additional string to be printed at the end (newline by default).

  • +
+
+
+
+

Added in version 0.12.0.

+
+
+

Added in version 0.23.0: added the end argument.

+
+
+ +
+
+process(process_id, arguments=None, metadata=None, namespace=None, **kwargs)[source]
+

Generic helper to create a new VectorCube by applying a process.

+
+
Parameters:
+
    +
  • process_id (str) – process id of the process.

  • +
  • args – argument dictionary for the process.

  • +
+
+
Return type:
+

VectorCube

+
+
Returns:
+

new VectorCube instance

+
+
+
+ +
+
+result_node()
+

Get the current result node (PGNode) of the process graph. +:rtype: PGNode

+
+

Added in version 0.10.1.

+
+
+ +
+
+run_udf(udf, runtime=None, version=None, context=None)[source]
+

Run a UDF on the vector cube.

+

It is recommended to provide the UDF just as UDF instance. +(the other arguments could be used to override UDF parameters if necessary).

+
+
Parameters:
+
    +
  • udf (Union[str, UDF]) – UDF code as a string or UDF instance

  • +
  • runtime (Optional[str]) – UDF runtime

  • +
  • version (Optional[str]) – UDF version

  • +
  • context (Optional[dict]) – UDF context

  • +
+
+
Return type:
+

VectorCube

+
+
+
+

Warning

+

EXPERIMENTAL: not generally supported, API subject to change.

+
+
+

Added in version 0.10.0.

+
+
+

Changed in version 0.16.0: Added support to pass self-contained UDF instance.

+
+
+

See also

+

openeo.org documentation on process “run_udf”.

+
+
+ +
+
+save_result(format='GeoJSON', options=None)[source]
+
+

See also

+

openeo.org documentation on process “save_result”.

+
+
+ +
+
+send_job(out_format=None, *, title=None, description=None, plan=None, budget=None, additional=None, job_options=None, validate=None, auto_add_save_result=True, **format_options)
+
+
Return type:
+

BatchJob

+
+
+
+

Deprecated since version 0.10.0: Usage of this legacy method is deprecated. Use +create_job() instead.

+
+
+ +
+
+to_json(*, indent=2, separators=None)
+

Get interoperable JSON representation of the process graph.

+

See DataCube.print_json() to directly print the JSON representation +and Export a process graph for more usage information.

+

Also see json.dumps docs for more information on the JSON formatting options.

+
+
Parameters:
+
    +
  • indent (Optional[int]) – JSON indentation level.

  • +
  • separators (Optional[Tuple[str, str]]) – (optional) tuple of item/key separators.

  • +
+
+
Return type:
+

str

+
+
Returns:
+

JSON string

+
+
+
+ +
+
+vector_to_raster(target)[source]
+

Converts this vector cube (VectorCube) into a raster data cube (DataCube). +The bounding polygon of homogenous areas of pixels is constructed.

+
+
Parameters:
+

target (DataCube) – a reference raster data cube to adopt the CRS/projection/resolution from.

+
+
Return type:
+

DataCube

+
+
+
+

Warning

+

vector_to_raster is an experimental, non-standard process. It is not widely supported, and its API is subject to change.

+
+
+

Added in version 0.28.0.

+
+
+ +
+ +
+
+

openeo.rest.mlmodel

+
+
+class openeo.rest.mlmodel.MlModel(graph, connection)[source]
+

A machine learning model.

+

It is the result of a training procedure, e.g. output of a fit_... process, +and can be used for prediction (classification or regression) with the corresponding predict_... process.

+
+

Added in version 0.10.0.

+
+
+
+create_job(*, title=None, description=None, plan=None, budget=None, additional=None, job_options=None)[source]
+

Sends a job to the backend and returns a ClientJob instance.

+
+
Parameters:
+
    +
  • title (Optional[str]) – job title

  • +
  • description (Optional[str]) – job description

  • +
  • plan (Optional[str]) – The billing plan to process and charge the job with

  • +
  • budget (Optional[float]) – Maximum budget to be spent on executing the job. +Note that some backends do not honor this limit.

  • +
  • additional (Optional[dict]) – additional (top-level) properties to set in the request body

  • +
  • job_options (Optional[dict]) – dictionary of job options to pass to the backend +(under top-level property “job_options”)

  • +
  • format_options – String Parameters for the job result format

  • +
+
+
Return type:
+

BatchJob

+
+
Returns:
+

Created job.

+
+
+
+

Added in version 0.36.0: Added argument additional.

+
+
+ +
+
+execute_batch(outputfile, *, title=None, description=None, plan=None, budget=None, print=<built-in function print>, max_poll_interval=60, connection_retry_interval=30, additional=None, job_options=None)[source]
+

Evaluate the process graph by creating a batch job, and retrieving the results when it is finished. +This method is mostly recommended if the batch job is expected to run in a reasonable amount of time.

+

For very long running jobs, you probably do not want to keep the client running.

+
+
Parameters:
+
    +
  • job_options (Optional[dict])

  • +
  • outputfile (Union[str, Path]) – The path of a file to which a result can be written

  • +
  • out_format – (optional) Format of the job result.

  • +
  • format_options – String Parameters for the job result format

  • +
  • additional (Optional[dict]) – additional (top-level) properties to set in the request body

  • +
  • job_options – dictionary of job options to pass to the backend +(under top-level property “job_options”)

  • +
+
+
Return type:
+

BatchJob

+
+
+
+

Added in version 0.36.0: Added argument additional.

+
+
+ +
+
+flat_graph()
+

Get the process graph in internal flat dict representation. +:rtype: Dict[str, dict]

+
+

Warning

+

This method is mainly intended for internal use. +It is not recommended for general use and is subject to change.

+

Instead, it is recommended to use +to_json() or print_json() +to obtain a standardized, interoperable JSON representation of the process graph. +See Export a process graph for more information.

+
+
+ +
+
+static load_ml_model(connection, id)[source]
+

Loads a machine learning model from a STAC Item.

+
+
Parameters:
+
    +
  • connection (Connection) – connection object

  • +
  • id (Union[str, BatchJob]) – STAC item reference, as URL, batch job (id) or user-uploaded file

  • +
+
+
Return type:
+

MlModel

+
+
Returns:
+

+
+
+
+

Added in version 0.10.0.

+
+
+

See also

+

openeo.org documentation on process “load_ml_model”.

+
+
+ +
+
+print_json(*, file=None, indent=2, separators=None, end='\\n')
+

Print interoperable JSON representation of the process graph.

+

See DataCube.to_json() to get the JSON representation as a string +and Export a process graph for more usage information.

+

Also see json.dumps docs for more information on the JSON formatting options.

+
+
Parameters:
+
    +
  • file – file-like object (stream) to print to (current sys.stdout by default). +Or a path (string or pathlib.Path) to a file to write to.

  • +
  • indent (Optional[int]) – JSON indentation level.

  • +
  • separators (Optional[Tuple[str, str]]) – (optional) tuple of item/key separators.

  • +
  • end (str) – additional string to be printed at the end (newline by default).

  • +
+
+
+
+

Added in version 0.12.0.

+
+
+

Added in version 0.23.0: added the end argument.

+
+
+ +
+
+result_node()
+

Get the current result node (PGNode) of the process graph. +:rtype: PGNode

+
+

Added in version 0.10.1.

+
+
+ +
+
+save_ml_model(options=None)[source]
+

Saves a machine learning model as part of a batch job.

+
+
Parameters:
+

options (Optional[dict]) – Additional parameters to create the file(s).

+
+
+
+ +
+
+to_json(*, indent=2, separators=None)
+

Get interoperable JSON representation of the process graph.

+

See DataCube.print_json() to directly print the JSON representation +and Export a process graph for more usage information.

+

Also see json.dumps docs for more information on the JSON formatting options.

+
+
Parameters:
+
    +
  • indent (Optional[int]) – JSON indentation level.

  • +
  • separators (Optional[Tuple[str, str]]) – (optional) tuple of item/key separators.

  • +
+
+
Return type:
+

str

+
+
Returns:
+

JSON string

+
+
+
+ +
+ +
+
+

openeo.rest.multiresult

+
+
+class openeo.rest.multiresult.MultiResult(leaves, connection=None)[source]
+

Helper to create and run batch jobs with process graphs +that contain multiple result nodes +or, more generally speaking, multiple process graph “leaf” nodes.

+

Provide multiple +DataCube/VectorCube +instances to the constructor, +and start a batch job from that, +for example as follows:

+
from openeo import MultiResult
+
+cube1 = ...
+cube2 = ...
+multi_result = MultiResult([cube1, cube2])
+job = multi_result.create_job()
+
+
+ +
+

Added in version 0.35.0.

+
+
+
+__init__(leaves, connection=None)[source]
+

Build a MultiResult instance from multiple leaf nodes

+
+
Parameters:
+
    +
  • leaves (List[FlatGraphableMixin]) – list of objects that can be +converted to an openEO-style (flat) process graph representation, +typically DataCube +or VectorCube instances.

  • +
  • connection (Optional[Connection]) – Optional connection to use for creating/starting batch jobs, +for special use cases where the provided leaf instances +are not already associated with a connection.

  • +
+
+
+
+ +
+
+print_json(*, file=None, indent=2, separators=None, end='\\n')
+

Print interoperable JSON representation of the process graph.

+

See DataCube.to_json() to get the JSON representation as a string +and Export a process graph for more usage information.

+

Also see json.dumps docs for more information on the JSON formatting options.

+
+
Parameters:
+
    +
  • file – file-like object (stream) to print to (current sys.stdout by default). +Or a path (string or pathlib.Path) to a file to write to.

  • +
  • indent (Optional[int]) – JSON indentation level.

  • +
  • separators (Optional[Tuple[str, str]]) – (optional) tuple of item/key separators.

  • +
  • end (str) – additional string to be printed at the end (newline by default).

  • +
+
+
+
+

Added in version 0.12.0.

+
+
+

Added in version 0.23.0: added the end argument.

+
+
+ +
+
+to_json(*, indent=2, separators=None)
+

Get interoperable JSON representation of the process graph.

+

See DataCube.print_json() to directly print the JSON representation +and Export a process graph for more usage information.

+

Also see json.dumps docs for more information on the JSON formatting options.

+
+
Parameters:
+
    +
  • indent (Optional[int]) – JSON indentation level.

  • +
  • separators (Optional[Tuple[str, str]]) – (optional) tuple of item/key separators.

  • +
+
+
Return type:
+

str

+
+
Returns:
+

JSON string

+
+
+
+ +
+ +
+
+

openeo.metadata

+
+
+class openeo.metadata.BandDimension(name, bands)[source]
+
+
+append_band(band)[source]
+

Create new BandDimension with appended band.

+
+
Return type:
+

BandDimension

+
+
+
+ +
+
+band_index(band)[source]
+

Resolve a given band (common) name/index to band index

+
+
Parameters:
+

band (Union[int, str]) – band name, common name or index

+
+
Return int:
+

band index

+
+
Return type:
+

int

+
+
+
+ +
+
+band_name(band, allow_common=True)[source]
+

Resolve (common) name or index to a valid (common) name

+
+
Return type:
+

str

+
+
+
+ +
+
+filter_bands(bands)[source]
+

Construct new BandDimension with subset of bands, +based on given band indices or (common) names

+
+
Return type:
+

BandDimension

+
+
+
+ +
+
+rename(name)[source]
+

Create new dimension with new name.

+
+
Return type:
+

Dimension

+
+
+
+ +
+
+rename_labels(target, source)[source]
+

Rename labels, if the type of dimension allows it.

+
+
Parameters:
+
    +
  • target – List of target labels

  • +
  • source – Source labels, or empty list

  • +
+
+
Return type:
+

Dimension

+
+
Returns:
+

A new dimension with modified labels, or the same if no change is applied.

+
+
+
+ +
+ +
+
+class openeo.metadata.CollectionMetadata(metadata, dimensions=None)[source]
+

Wrapper for EO Data Collection metadata.

+

Simplifies getting values from deeply nested mappings, +allows additional parsing and normalizing compatibility issues.

+

Metadata is expected to follow format defined by +https://openeo.org/documentation/1.0/developers/api/reference.html#operation/describe-collection +(with partial support for older versions)

+
+ +
+
+class openeo.metadata.SpatialDimension(name, extent, crs=4326, step=None)[source]
+
+
+rename(name)[source]
+

Create new dimension with new name.

+
+
Return type:
+

Dimension

+
+
+
+ +
+ +
+
+class openeo.metadata.TemporalDimension(name, extent)[source]
+
+
+rename(name)[source]
+

Create new dimension with new name.

+
+
Return type:
+

Dimension

+
+
+
+ +
+
+rename_labels(target, source)[source]
+

Rename labels, if the type of dimension allows it.

+
+
Parameters:
+
    +
  • target – List of target labels

  • +
  • source – Source labels, or empty list

  • +
+
+
Return type:
+

Dimension

+
+
Returns:
+

A new dimension with modified labels, or the same if no change is applied.

+
+
+
+ +
+ +
+
+

openeo.api.process

+
+
+class openeo.api.process.Parameter(name, description=None, schema=None, default=<object object>, optional=None)[source]
+

A (process) parameter to build parameterized +user-defined processes.

+

Parameter objects can be defined +with at least a name and expected schema +(e.g. is the parameter a placeholder for a string, a bounding box, a date, …) +and can then be used +with various functions and classes, +like DataCube, +to build parameterized user-defined processes.

+

Apart from the generic Parameter constructor, +this class also provides various helpers (class methods) +to easily create parameters for common parameter types.

+
+
Parameters:
+
    +
  • name (str) – parameter name, which will be used to assign concrete values to. +It is recommended to stick to the convention of snake case naming (using lowercase with underscores).

  • +
  • description (Optional[str]) – human-readable description of the parameter.

  • +
  • schema (Union[list, dict, str, None]) – JSON schema describing the expected data type and structure of the parameter.

  • +
  • default – default value for the parameter when it’s optional.

  • +
  • optional (Optional[bool]) – toggle to indicate whether the parameter is optional or required.

  • +
+
+
+
+
+classmethod array(name, description=None, *, item_schema=None, **kwargs)[source]
+

Helper to easily create parameter with an ‘array’ schema.

+
+
Parameters:
+
    +
  • name (str) – parameter name, which will be used to assign concrete values to. +It is recommended to stick to the convention of snake case naming (using lowercase with underscores).

  • +
  • description (Optional[str]) – human-readable description of the parameter.

  • +
  • item_schema (Union[str, dict, None]) – Schema of the array items given in JSON Schema style, e.g. {"type": "string"}. +Simple schemas can also be specified as single string: +e.g. "string" will be expanded to {"type": "string"}.

  • +
+
+
Return type:
+

Parameter

+
+
+

See the generic Parameter constructor for information on additional arguments (except schema).

+
+

Changed in version 0.23.0: Added item_schema argument.

+
+
+ +
+
+classmethod boolean(name, description=None, **kwargs)[source]
+

Helper to easily create a ‘boolean’ parameter.

+
+
Parameters:
+
    +
  • name (str) – parameter name, which will be used to assign concrete values to. +It is recommended to stick to the convention of snake case naming (using lowercase with underscores).

  • +
  • description (Optional[str]) – human-readable description of the parameter.

  • +
+
+
Return type:
+

Parameter

+
+
+

See the generic Parameter constructor for information on additional arguments (except schema).

+
+ +
+
+classmethod bounding_box(name, description="Spatial extent specified as a bounding box with 'west', 'south', 'east' and 'north' fields.", **kwargs)[source]
+

Helper to easily create a ‘bounding box’ parameter, which allows to specify a spatial extent +with “west”, “south”, “east” and “north” bounds (and optionally a CRS identifier).

+
+
Parameters:
+
    +
  • name (str) – parameter name, which will be used to assign concrete values to. +It is recommended to stick to the convention of snake case naming (using lowercase with underscores).

  • +
  • description (str) – human-readable description of the parameter.

  • +
+
+
Return type:
+

Parameter

+
+
+

See the generic Parameter constructor for information on additional arguments (except schema).

+
+

Added in version 0.30.0.

+
+
+ +
+
+classmethod datacube(name='data', description='A data cube.', **kwargs)[source]
+

Helper to easily create a ‘datacube’ parameter.

+
+
Parameters:
+
    +
  • name (str) – parameter name, which will be used to assign concrete values to. +It is recommended to stick to the convention of snake case naming (using lowercase with underscores).

  • +
  • description (str) – human-readable description of the parameter.

  • +
+
+
Return type:
+

Parameter

+
+
+

See the generic Parameter constructor for information on additional arguments (except schema).

+
+

Added in version 0.22.0.

+
+
+ +
+
+classmethod date(name, description='A date.', **kwargs)[source]
+

Helper to easily create a ‘date’ parameter.

+
+
Parameters:
+
    +
  • name (str) – parameter name, which will be used to assign concrete values to. +It is recommended to stick to the convention of snake case naming (using lowercase with underscores).

  • +
  • description (str) – human-readable description of the parameter.

  • +
+
+
Return type:
+

Parameter

+
+
+

See the generic Parameter constructor for information on additional arguments (except schema).

+
+

Added in version 0.30.0.

+
+
+ +
+
+classmethod date_time(name, description='A date with time.', **kwargs)[source]
+

Helper to easily create a ‘date-time’ parameter.

+
+
Parameters:
+
    +
  • name (str) – parameter name, which will be used to assign concrete values to. +It is recommended to stick to the convention of snake case naming (using lowercase with underscores).

  • +
  • description (str) – human-readable description of the parameter.

  • +
+
+
Return type:
+

Parameter

+
+
+

See the generic Parameter constructor for information on additional arguments (except schema).

+
+

Added in version 0.30.0.

+
+
+ +
+
+classmethod geojson(name, description='Geometries specified as GeoJSON object.', **kwargs)[source]
+

Helper to easily create a ‘geojson’ parameter, which allows to specify geometries as an inline GeoJSON object.

+
+
Parameters:
+
    +
  • name (str) – parameter name, which will be used to assign concrete values to. +It is recommended to stick to the convention of snake case naming (using lowercase with underscores).

  • +
  • description (str) – human-readable description of the parameter.

  • +
+
+
Return type:
+

Parameter

+
+
+

See the generic Parameter constructor for information on additional arguments (except schema).

+
+

Added in version 0.30.0.

+
+
+ +
+
+classmethod integer(name, description=None, **kwargs)[source]
+

Helper to create an ‘integer’ parameter.

+
+
Parameters:
+
    +
  • name (str) – parameter name, which will be used to assign concrete values to. +It is recommended to stick to the convention of snake case naming (using lowercase with underscores).

  • +
  • description (Optional[str]) – human-readable description of the parameter.

  • +
+
+
Return type:
+

Parameter

+
+
+

See the generic Parameter constructor for information on additional arguments (except schema).

+
+ +
+
+classmethod number(name, description=None, **kwargs)[source]
+

Helper to easily create a ‘number’ parameter.

+
+
Parameters:
+
    +
  • name (str) – parameter name, which will be used to assign concrete values to. +It is recommended to stick to the convention of snake case naming (using lowercase with underscores).

  • +
  • description (Optional[str]) – human-readable description of the parameter.

  • +
+
+
Return type:
+

Parameter

+
+
+

See the generic Parameter constructor for information on additional arguments (except schema).

+
+ +
+
+classmethod object(name, description=None, *, subtype=None, **kwargs)[source]
+

Helper to create an ‘object’ type parameter

+
+
Parameters:
+
    +
  • name (str) – parameter name, which will be used to assign concrete values to. +It is recommended to stick to the convention of snake case naming (using lowercase with underscores).

  • +
  • description (Optional[str]) – human-readable description of the parameter.

  • +
  • subtype (Optional[str]) – subtype of the ‘object’ schema

  • +
+
+
Return type:
+

Parameter

+
+
+

See the generic Parameter constructor for information on additional arguments (except schema).

+
+

Added in version 0.26.0.

+
+
+ +
+
+classmethod raster_cube(name='data', description='A data cube.', **kwargs)[source]
+

Helper to easily create a ‘raster-cube’ parameter.

+
+
Parameters:
+
    +
  • name (str) – parameter name, which will be used to assign concrete values to. +It is recommended to stick to the convention of snake case naming (using lowercase with underscores).

  • +
  • description (str) – human-readable description of the parameter.

  • +
+
+
Return type:
+

Parameter

+
+
+

See the generic Parameter constructor for information on additional arguments (except schema).

+
+ +
+
+classmethod spatial_extent(name='spatial_extent', description=None, **kwargs)[source]
+

Helper to easily create a ‘spatial_extent’ parameter, which is compatible with the load_collection argument of +the same name. This allows to conveniently create user-defined processes that can be applied to a bounding box and vector data +for spatial filtering. It is also possible for users to set to null, and define spatial filtering using other processes.

+
+
Parameters:
+
    +
  • name (str) – parameter name, which will be used to assign concrete values to. +It is recommended to stick to the convention of snake case naming (using lowercase with underscores).

  • +
  • description (Optional[str]) – human-readable description of the parameter.

  • +
+
+
Return type:
+

Parameter

+
+
+

See the generic Parameter constructor for information on additional arguments (except schema).

+
+

Added in version 0.32.0.

+
+
+ +
+
+classmethod string(name, description=None, *, values=None, subtype=None, format=None, **kwargs)[source]
+

Helper to easily create a ‘string’ parameter.

+
+
Parameters:
+
    +
  • name (str) – parameter name, which will be used to assign concrete values to. +It is recommended to stick to the convention of snake case naming (using lowercase with underscores).

  • +
  • description (Optional[str]) – human-readable description of the parameter.

  • +
  • values (Optional[List[str]]) – Optional list of allowed string values to make this an “enum”.

  • +
  • subtype (Optional[str]) – Optional subtype of the ‘string’ schema.

  • +
  • format (Optional[str]) – Optional format of the ‘string’ schema.

  • +
+
+
Return type:
+

Parameter

+
+
+

See the generic Parameter constructor for information on additional arguments (except schema).

+
+ +
+
+classmethod temporal_interval(name='temporal_extent', description='Temporal extent specified as two-element array with start and end date/date-time.', **kwargs)[source]
+

Helper to easily create a ‘temporal-interval’ parameter, which allows to specify a temporal extent +as a two-element array with start and end date/date-time.

+
+
Parameters:
+
    +
  • name (str) – parameter name, which will be used to assign concrete values to. +It is recommended to stick to the convention of snake case naming (using lowercase with underscores).

  • +
  • description (str) – human-readable description of the parameter.

  • +
+
+
Return type:
+

Parameter

+
+
+

See the generic Parameter constructor for information on additional arguments (except schema).

+
+

Added in version 0.30.0.

+
+
+ +
+
+to_dict()[source]
+

Convert to dictionary for JSON-serialization.

+
+
Return type:
+

dict

+
+
+
+ +
+ +
+
+

openeo.api.logs

+
+
+class openeo.api.logs.LogEntry(*args, **kwargs)[source]
+

Log message and info for jobs and services

+
+
Fields:
    +
  • id: Unique ID for the log, string, REQUIRED

  • +
  • code: Error code, string, optional

  • +
  • level: Severity level, string (error, warning, info or debug), REQUIRED

  • +
  • message: Error message, string, REQUIRED

  • +
  • time: Date and time of the error event as RFC3339 date-time, string, available since API 1.1.0

  • +
  • path: A “stack trace” for the process, array of dicts

  • +
  • links: Related links, array of dicts

  • +
  • usage: Usage metrics available as property ‘usage’, dict, available since API 1.1.0 +May contain the following metrics: cpu, memory, duration, network, disk, storage and other custom ones +Each of the metrics is also a dict with the following parts: value (numeric) and unit (string)

  • +
  • data: Arbitrary data the user wants to “log” for debugging purposes. +Please note that this property may not exist as there’s a difference +between None and non-existing. None for example refers to no-data in +many cases while the absence of the property means that the user did +not provide any data for debugging.

  • +
+
+
+
+ +
+
+openeo.api.logs.normalize_log_level(log_level, default=10)[source]
+

Helper function to convert a openEO API log level (e.g. string “error”) +to the integer constants defined in Python’s standard library logging module (e.g. logging.ERROR).

+
+
Parameters:
+
    +
  • log_level (Union[int, str, None]) – log level to normalize: a log level string in the style of +the openEO API (“error”, “warning”, “info”, or “debug”), +an integer value (e.g. a logging constant), or None.

  • +
  • default (int) – fallback log level to return on unknown log level strings or None input.

  • +
+
+
Raises:
+

TypeError – when log_level is any other type than str, an int or None.

+
+
Return type:
+

int

+
+
Returns:
+

One of the following log level constants from the standard module logging: +logging.ERROR, logging.WARNING, logging.INFO, or logging.DEBUG .

+
+
+
+ +
+
+

openeo.rest.connection

+

This module provides a Connection object to manage and persist settings when interacting with the OpenEO API.

+
+
+class openeo.rest.connection.Connection(url, *, session=None, default_timeout=None, auto_validate=True, slow_response_threshold=None, auth_config=None, refresh_token_store=None, oidc_auth_renewer=None, auth=None)[source]
+

Connection to an openEO backend.

+
+
Parameters:
+
    +
  • url (str) – Backend root url

  • +
  • session (Optional[Session]) – Optional requests.Session object to use for requests.

  • +
  • default_timeout (Optional[int]) – Default timeout for requests in seconds.

  • +
  • auto_validate (bool) – toggle to automatically validate process graphs before execution

  • +
  • slow_response_threshold (Optional[float]) – Optional threshold in seconds +to consider a response as slow and log a warning.

  • +
  • auth_config (Optional[AuthConfig]) – Optional AuthConfig object +to fetch authentication related configuration from.

  • +
  • refresh_token_store (Optional[RefreshTokenStore]) – For advanced usage: +custom RefreshTokenStore object +to use for storing/loading refresh tokens.

  • +
  • oidc_auth_renewer (Optional[OidcAuthenticator]) – For advanced usage: +optional OidcAuthenticator object to use for renewing OIDC tokens.

  • +
  • auth (Optional[AuthBase]) – Optional requests.auth.AuthBase object to use for requests. +Usage of this parameter is deprecated, use the specific authentication methods instead.

  • +
+
+
+
+
+as_curl(data, *, path='/result', method='POST', obfuscate_auth=False, additional=None, job_options=None)[source]
+

Build curl command to evaluate given process graph or data cube +(including authorization and content-type headers).

+
>>> print(connection.as_curl(cube))
+curl -i -X POST -H 'Content-Type: application/json' -H 'Authorization: Bearer ...' \
+    --data '{"process":{"process_graph":{...}}' \
+    https://openeo.example/openeo/1.1/result
+
+
+
+
Parameters:
+
    +
  • data (Union[dict, DataCube, FlatGraphableMixin]) – something that is convertable to an openEO process graph: a dictionary, +a DataCube object, +a ProcessBuilder, …

  • +
  • path – endpoint to send request to: typically "/result" (default) for synchronous requests +or "/jobs" for batch jobs

  • +
  • method – HTTP method to use (typically "POST")

  • +
  • obfuscate_auth (bool) – don’t show actual bearer token

  • +
  • additional (Optional[dict]) – additional (top-level) properties to set in the request body

  • +
  • job_options (Optional[dict]) – dictionary of job options to pass to the backend +(under top-level property “job_options”)

  • +
+
+
Return type:
+

str

+
+
Returns:
+

curl command as a string

+
+
+
+

Added in version 0.36.0: Added arguments additional and job_options.

+
+
+ +
+
+assert_user_defined_process_support()[source]
+

Capabilities document based verification that back-end supports user-defined processes.

+
+

Added in version 0.23.0.

+
+
+ +
+
+authenticate_basic(username=None, password=None)[source]
+

Authenticate a user to the backend using basic username and password.

+
+
Parameters:
+
    +
  • username (Optional[str]) – User name

  • +
  • password (Optional[str]) – User passphrase

  • +
+
+
Return type:
+

Connection

+
+
+
+ +
+
+authenticate_oidc(provider_id=None, client_id=None, client_secret=None, *, store_refresh_token=True, use_pkce=None, display=<built-in function print>, max_poll_time=300)[source]
+

Generic method to do OpenID Connect authentication.

+

In the context of interactive usage, this method first tries to use refresh tokens +and falls back on device code flow.

+

For non-interactive, machine-to-machine contexts, it is also possible to trigger +the usage of the “client_credentials” flow through environment variables. +Assuming you have set up a OIDC client (with a secret): +set OPENEO_AUTH_METHOD to client_credentials, +set OPENEO_AUTH_CLIENT_ID to the client id, +and set OPENEO_AUTH_CLIENT_SECRET to the client secret.

+

See OIDC Authentication: Dynamic Method Selection for more details.

+
+
Parameters:
+
    +
  • provider_id (Optional[str]) – provider id to use

  • +
  • client_id (Optional[str]) – client id to use

  • +
  • client_secret (Optional[str]) – client secret to use

  • +
  • max_poll_time (float) – maximum time in seconds to keep polling for successful authentication.

  • +
+
+
+
+

Added in version 0.6.0.

+
+
+

Changed in version 0.17.0: Add max_poll_time argument

+
+
+

Changed in version 0.18.0: Add support for client credentials flow.

+
+
+ +
+
+authenticate_oidc_access_token(access_token, provider_id=None)[source]
+

Set up authorization headers directly with an OIDC access token.

+

Connection provides multiple methods to handle various OIDC authentication flows end-to-end. +If you already obtained a valid OIDC access token in another “out-of-band” way, you can use this method to +set up the authorization headers appropriately.

+
+
Parameters:
+
    +
  • access_token (str) – OIDC access token

  • +
  • provider_id (Optional[str]) – id of the OIDC provider as listed by the openEO backend (/credentials/oidc). +If not specified, the first (default) OIDC provider will be used.

  • +
  • skip_verification – Skip clients-side verification of the provider_id +against the backend’s list of providers to avoid and related OIDC configuration

  • +
+
+
Return type:
+

Connection

+
+
+
+

Added in version 0.31.0.

+
+
+

Changed in version 0.33.0: Return connection object to support chaining.

+
+
+ +
+
+authenticate_oidc_authorization_code(client_id=None, client_secret=None, provider_id=None, timeout=None, server_address=None, webbrowser_open=None, store_refresh_token=False)[source]
+

OpenID Connect Authorization Code Flow (with PKCE). +:rtype: Connection

+
+

Deprecated since version 0.19.0: Usage of the Authorization Code flow is deprecated (because of its complexity) and will be removed. +It is recommended to use the Device Code flow with authenticate_oidc_device() +or Client Credentials flow with authenticate_oidc_client_credentials().

+
+
+ +
+
+authenticate_oidc_client_credentials(client_id=None, client_secret=None, provider_id=None)[source]
+

Authenticate with OIDC Client Credentials flow

+

Client id, secret and provider id can be specified directly through the available arguments. +It is also possible to leave these arguments empty and specify them through +environment variables OPENEO_AUTH_CLIENT_ID, +OPENEO_AUTH_CLIENT_SECRET and OPENEO_AUTH_PROVIDER_ID respectively +as discussed in OIDC Client Credentials Using Environment Variables.

+
+
Parameters:
+
    +
  • client_id (Optional[str]) – client id to use

  • +
  • client_secret (Optional[str]) – client secret to use

  • +
  • provider_id (Optional[str]) – provider id to use +Fallback value can be set through environment variable OPENEO_AUTH_PROVIDER_ID.

  • +
+
+
Return type:
+

Connection

+
+
+
+

Changed in version 0.18.0: Allow specifying client id, secret and provider id through environment variables.

+
+
+ +
+
+authenticate_oidc_device(client_id=None, client_secret=None, provider_id=None, *, store_refresh_token=False, use_pkce=None, max_poll_time=300, **kwargs)[source]
+

Authenticate with the OIDC Device Code flow

+
+
Parameters:
+
    +
  • client_id (Optional[str]) – client id to use instead of the default one

  • +
  • client_secret (Optional[str]) – client secret to use instead of the default one

  • +
  • provider_id (Optional[str]) – provider id to use. +Fallback value can be set through environment variable OPENEO_AUTH_PROVIDER_ID.

  • +
  • store_refresh_token (bool) – whether to store the received refresh token automatically

  • +
  • use_pkce (Optional[bool]) – Use PKCE instead of client secret. +If not set explicitly to True (use PKCE) or False (use client secret), +it will be attempted to detect the best mode automatically. +Note that PKCE for device code is not widely supported among OIDC providers.

  • +
  • max_poll_time (float) – maximum time in seconds to keep polling for successful authentication.

  • +
+
+
Return type:
+

Connection

+
+
+
+

Changed in version 0.5.1: Add use_pkce argument

+
+
+

Changed in version 0.17.0: Add max_poll_time argument

+
+
+

Changed in version 0.19.0: Support fallback provider id through environment variable OPENEO_AUTH_PROVIDER_ID.

+
+
+ +
+
+authenticate_oidc_refresh_token(client_id=None, refresh_token=None, client_secret=None, provider_id=None, *, store_refresh_token=False)[source]
+

Authenticate with OIDC Refresh Token flow

+
+
Parameters:
+
    +
  • client_id (Optional[str]) – client id to use

  • +
  • refresh_token (Optional[str]) – refresh token to use

  • +
  • client_secret (Optional[str]) – client secret to use

  • +
  • provider_id (Optional[str]) – provider id to use. +Fallback value can be set through environment variable OPENEO_AUTH_PROVIDER_ID.

  • +
  • store_refresh_token (bool) – whether to store the received refresh token automatically

  • +
+
+
Return type:
+

Connection

+
+
+
+

Changed in version 0.19.0: Support fallback provider id through environment variable OPENEO_AUTH_PROVIDER_ID.

+
+
+ +
+
+authenticate_oidc_resource_owner_password_credentials(username, password, client_id=None, client_secret=None, provider_id=None, store_refresh_token=False)[source]
+

OpenId Connect Resource Owner Password Credentials

+
+
Return type:
+

Connection

+
+
+
+ +
+
+capabilities()[source]
+

Loads all available capabilities.

+
+
Return type:
+

RESTCapabilities

+
+
+
+ +
+
+collection_items(name, spatial_extent=None, temporal_extent=None, limit=None)[source]
+

Loads items for a specific image collection. +May not be available for all collections.

+

This is an experimental API and is subject to change.

+
+
Parameters:
+
    +
  • name – String Id of the collection

  • +
  • spatial_extent (Optional[List[float]]) – Limits the items to the given bounding box in WGS84: +1. Lower left corner, coordinate axis 1 +2. Lower left corner, coordinate axis 2 +3. Upper right corner, coordinate axis 1 +4. Upper right corner, coordinate axis 2

  • +
  • temporal_extent (Optional[List[Union[str, datetime]]]) – Limits the items to the specified temporal interval.

  • +
  • limit (Optional[int]) – The amount of items per request/page. If None, the back-end decides. +The interval has to be specified as an array with exactly two elements (start, end). +Also supports open intervals by setting one of the boundaries to None, but never both.

  • +
+
+
Return type:
+

Iterator[dict]

+
+
Returns:
+

data_list: List A list of items

+
+
+
+ +
+
+create_job(process_graph, *, title=None, description=None, plan=None, budget=None, additional=None, job_options=None, validate=None)[source]
+

Create a new job from given process graph on the back-end.

+
+
Parameters:
+
    +
  • process_graph (Union[dict, FlatGraphableMixin, str, Path, List[FlatGraphableMixin]]) – openEO-style (flat) process graph representation, +or an object that can be converted to such a representation: +a dictionary, a DataCube object, +a string with a JSON representation, +a local file path or URL to a JSON representation, +a MultiResult object, …

  • +
  • title (Optional[str]) – job title

  • +
  • description (Optional[str]) – job description

  • +
  • plan (Optional[str]) – The billing plan to process and charge the job with

  • +
  • budget (Optional[float]) – Maximum budget to be spent on executing the job. +Note that some backends do not honor this limit.

  • +
  • additional (Optional[dict]) – additional (top-level) properties to set in the request body

  • +
  • job_options (Optional[dict]) – dictionary of job options to pass to the backend +(under top-level property “job_options”)

  • +
  • validate (Optional[bool]) – Optional toggle to enable/prevent validation of the process graphs before execution +(overruling the connection’s auto_validate setting).

  • +
+
+
Return type:
+

BatchJob

+
+
Returns:
+

Created job

+
+
+
+

Changed in version 0.35.0: Add multi-result support.

+
+
+

Added in version 0.36.0: Added argument job_options.

+
+
+ +
+
+datacube_from_flat_graph(flat_graph, parameters=None)[source]
+

Construct a DataCube from a flat dictionary representation of a process graph.

+ +
+
Parameters:
+
    +
  • flat_graph (dict) – flat dictionary representation of a process graph +or a process dictionary with such a flat process graph under a “process_graph” field +(and optionally parameter metadata under a “parameters” field).

  • +
  • parameters (Optional[dict]) – Optional dictionary mapping parameter names to parameter values +to use for parameters occurring in the process graph (e.g. as used in user-defined processes)

  • +
+
+
Return type:
+

DataCube

+
+
Returns:
+

A DataCube corresponding with the operations encoded in the process graph

+
+
+
+ +
+
+datacube_from_json(src, parameters=None)[source]
+

Construct a DataCube from JSON resource containing (flat) process graph representation.

+ +
+
Parameters:
+
    +
  • src (Union[str, Path]) – raw JSON string, URL to JSON resource or path to local JSON file

  • +
  • parameters (Optional[dict]) – Optional dictionary mapping parameter names to parameter values +to use for parameters occurring in the process graph (e.g. as used in user-defined processes)

  • +
+
+
Return type:
+

DataCube

+
+
Returns:
+

A DataCube corresponding with the operations encoded in the process graph

+
+
+
+ +
+
+datacube_from_process(process_id, namespace=None, **kwargs)[source]
+

Load a data cube from a (custom) process.

+
+
Parameters:
+
    +
  • process_id (str) – The process id.

  • +
  • namespace (Optional[str]) – optional: process namespace

  • +
  • kwargs – The arguments of the custom process

  • +
+
+
Return type:
+

DataCube

+
+
Returns:
+

A DataCube, without valid metadata, as the client is not aware of this custom process.

+
+
+
+ +
+
+describe_account()[source]
+

Describes the currently authenticated user account.

+
+
Return type:
+

dict

+
+
+
+ +
+
+describe_collection(collection_id)[source]
+

Get full collection metadata for given collection id.

+
+

See also

+

list_collection_ids() +to list all collection ids provided by the back-end.

+
+
+
Parameters:
+

collection_id (str) – collection id

+
+
Return type:
+

dict

+
+
Returns:
+

collection metadata.

+
+
+
+ +
+
+describe_process(id, namespace=None)[source]
+

Returns a single process from the back end.

+
+
Parameters:
+
    +
  • id (str) – The id of the process.

  • +
  • namespace (Optional[str]) – The namespace of the process.

  • +
+
+
Return type:
+

dict

+
+
Returns:
+

The process definition.

+
+
+
+ +
+
+download(graph, outputfile=None, *, timeout=None, validate=None, chunk_size=10000000, additional=None, job_options=None)[source]
+

Downloads the result of a process graph synchronously, +and save the result to the given file or return bytes object if no outputfile is specified. +This method is useful to export binary content such as images. For json content, the execute method is recommended.

+
+
Parameters:
+
    +
  • graph (Union[dict, FlatGraphableMixin, str, Path, List[FlatGraphableMixin]]) – (flat) dict representing a process graph, or process graph as raw JSON string, +or as local file path or URL

  • +
  • outputfile (Union[Path, str, None]) – output file

  • +
  • timeout (Optional[int]) – timeout to wait for response

  • +
  • validate (Optional[bool]) – Optional toggle to enable/prevent validation of the process graphs before execution +(overruling the connection’s auto_validate setting).

  • +
  • chunk_size (int) – chunk size for streaming response.

  • +
  • additional (Optional[dict]) – additional (top-level) properties to set in the request body

  • +
  • job_options (Optional[dict]) – dictionary of job options to pass to the backend +(under top-level property “job_options”)

  • +
+
+
Return type:
+

Optional[bytes]

+
+
+
+

Added in version 0.36.0: Added arguments additional and job_options.

+
+
+ +
+
+execute(process_graph, *, timeout=None, validate=None, auto_decode=True, additional=None, job_options=None)[source]
+

Execute a process graph synchronously and return the result. If the result is a JSON object, it will be parsed.

+
+
Parameters:
+
    +
  • process_graph (Union[dict, FlatGraphableMixin, str, Path, List[FlatGraphableMixin]]) – (flat) dict representing a process graph, or process graph as raw JSON string, +or as local file path or URL

  • +
  • validate (Optional[bool]) – Optional toggle to enable/prevent validation of the process graphs before execution +(overruling the connection’s auto_validate setting).

  • +
  • auto_decode (bool) – Boolean flag to enable/disable automatic JSON decoding of the response. Defaults to True.

  • +
  • additional (Optional[dict]) – additional (top-level) properties to set in the request body

  • +
  • job_options (Optional[dict]) – dictionary of job options to pass to the backend +(under top-level property “job_options”)

  • +
+
+
Return type:
+

Union[dict, Response]

+
+
Returns:
+

parsed JSON response as a dict if auto_decode is True, otherwise response object

+
+
+
+

Added in version 0.36.0: Added arguments additional and job_options.

+
+
+ +
+
+get_file(path, metadata=None)[source]
+

Gets a handle to a user-uploaded file in the user workspace on the back-end.

+
+
Parameters:
+

path (Union[str, PurePosixPath]) – The path on the user workspace.

+
+
Return type:
+

UserFile

+
+
+
+ +
+
+imagecollection(collection_id, spatial_extent=None, temporal_extent=None, bands=None, properties=None, max_cloud_cover=None, fetch_metadata=True)
+
+
Return type:
+

DataCube

+
+
+
+

Deprecated since version 0.4.10: Usage of this legacy method is deprecated. Use +load_collection() instead.

+
+
+ +
+
+job(job_id)[source]
+

Get the job based on the id. The job with the given id should already exist.

+

Use openeo.rest.connection.Connection.create_job() to create new jobs

+
+
Parameters:
+

job_id (str) – the job id of an existing job

+
+
Return type:
+

BatchJob

+
+
Returns:
+

A job object.

+
+
+
+ +
+
+job_logs(job_id, offset)[source]
+

Get batch job logs. +:rtype: list

+
+

Deprecated since version 0.4.10: Use openeo.rest.job.BatchJob.logs() instead.

+
+
+ +
+
+job_results(job_id)[source]
+

Get batch job results metadata. +:rtype: dict

+
+

Deprecated since version 0.4.10: Use openeo.rest.job.BatchJob.get_results() instead.

+
+
+ +
+
+list_collection_ids()[source]
+

List all collection ids provided by the back-end.

+
+

See also

+

describe_collection() +to get the metadata of a particular collection.

+
+
+
Return type:
+

List[str]

+
+
Returns:
+

list of collection ids

+
+
+
+ +
+
+list_collections()[source]
+

List basic metadata of all collections provided by the back-end.

+
+

Caution

+

Only the basic collection metadata will be returned. +To obtain full metadata of a particular collection, +it is recommended to use describe_collection() instead.

+
+
+
Return type:
+

List[dict]

+
+
Returns:
+

list of dictionaries with basic collection metadata.

+
+
+
+ +
+
+list_file_formats()[source]
+

Get available input and output formats

+
+
Return type:
+

dict

+
+
+
+ +
+
+list_file_types()
+
+
Return type:
+

dict

+
+
+
+

Deprecated since version 0.4.6: Usage of this legacy method is deprecated. Use +list_output_formats() instead.

+
+
+ +
+
+list_files()[source]
+

Lists all user-uploaded files in the user workspace on the back-end.

+
+
Return type:
+

List[UserFile]

+
+
Returns:
+

List of the user-uploaded files.

+
+
+
+ +
+
+list_jobs(limit=None)[source]
+

Lists all jobs of the authenticated user.

+
+
Parameters:
+

limit (Optional[int]) – maximum number of jobs to return. Setting this limit enables pagination.

+
+
Return type:
+

List[dict]

+
+
Returns:
+

job_list: Dict of all jobs of the user.

+
+
+
+

Added in version 0.36.0: Added limit argument

+
+
+ +
+
+list_processes(namespace=None)[source]
+

Loads all available processes of the back end.

+
+
Parameters:
+

namespace (Optional[str]) – The namespace for which to list processes.

+
+
Return type:
+

List[dict]

+
+
Returns:
+

processes_dict: Dict All available processes of the back end.

+
+
+
+ +
+
+list_service_types()[source]
+

Loads all available service types.

+
+
Return type:
+

dict

+
+
Returns:
+

data_dict: Dict All available service types

+
+
+
+ +
+
+list_services()[source]
+

Loads all available services of the authenticated user.

+
+
Return type:
+

dict

+
+
Returns:
+

data_dict: Dict All available services

+
+
+
+ +
+
+list_udf_runtimes()[source]
+

List information about the available UDF runtimes.

+
+
Return type:
+

dict

+
+
Returns:
+

A dictionary with metadata about each available UDF runtime.

+
+
+
+ +
+
+list_user_defined_processes()[source]
+

Lists all user-defined processes of the authenticated user.

+
+
Return type:
+

List[dict]

+
+
+
+ +
+
+load_collection(collection_id, spatial_extent=None, temporal_extent=None, bands=None, properties=None, max_cloud_cover=None, fetch_metadata=True)[source]
+

Load a DataCube by collection id.

+
+
Parameters:
+
    +
  • collection_id (Union[str, Parameter]) – image collection identifier

  • +
  • spatial_extent (Union[Dict[str, float], Parameter, None]) – limit data to specified bounding box or polygons

  • +
  • temporal_extent (Union[Sequence[Union[str, date, Parameter, PGNode, ProcessBuilderBase, None]], Parameter, str, None]) – limit data to specified temporal interval. +Typically, just a two-item list or tuple containing start and end date. +See Filter on temporal extent for more details on temporal extent handling and shorthand notation.

  • +
  • bands (Union[None, List[str], Parameter]) – only add the specified bands.

  • +
  • properties (Union[None, Dict[str, Union[str, PGNode, Callable]], List[CollectionProperty], CollectionProperty]) – limit data by collection metadata property predicates. +See collection_property() for easy construction of such predicates.

  • +
  • max_cloud_cover (Optional[float]) – shortcut to set maximum cloud cover (“eo:cloud_cover” collection property)

  • +
+
+
Return type:
+

DataCube

+
+
Returns:
+

a datacube containing the requested data

+
+
+
+

Added in version 0.13.0: added the max_cloud_cover argument.

+
+
+

Changed in version 0.23.0: Argument temporal_extent: add support for year/month shorthand notation +as discussed at Year/month shorthand notation.

+
+
+

Changed in version 0.26.0: Add collection_property() support to properties argument.

+
+
+

See also

+

openeo.org documentation on process “load_collection”.

+
+
+ +
+
+load_disk_collection(format, glob_pattern, options=None)[source]
+

Loads image data from disk as a DataCube.

+

This is backed by a non-standard process (‘load_disk_data’). This will eventually be replaced by standard options such as +openeo.rest.connection.Connection.load_stac() or https://processes.openeo.org/#load_uploaded_files

+
+
Parameters:
+
    +
  • format (str) – the file format, e.g. ‘GTiff’

  • +
  • glob_pattern (str) – a glob pattern that matches the files to load from disk

  • +
  • options (Optional[dict]) – options specific to the file format

  • +
+
+
Return type:
+

DataCube

+
+
+
+

Deprecated since version 0.25.0: Depends on non-standard process, replace with +openeo.rest.connection.Connection.load_stac() where +possible.

+
+
+ +
+
+load_geojson(data, properties=None)[source]
+

Converts GeoJSON data as defined by RFC 7946 into a vector data cube.

+
+
Parameters:
+
    +
  • data (Union[dict, str, Path, BaseGeometry, Parameter]) –

    the geometry to load. One of:

    +
      +
    • GeoJSON-style data structure: e.g. a dictionary with "type": "Polygon" and "coordinates" fields

    • +
    • a path to a local GeoJSON file

    • +
    • a GeoJSON string

    • +
    • a shapely geometry object

    • +
    +

  • +
  • properties (Optional[List[str]]) – A list of properties from the GeoJSON file to construct an additional dimension from.

  • +
+
+
Returns:
+

new VectorCube instance

+
+
+
+

Warning

+

EXPERIMENTAL: this process is experimental with the potential for major things to change.

+
+
+

Added in version 0.22.0.

+
+
+

See also

+

openeo.org documentation on process “load_geojson”.

+
+
+ +
+
+load_ml_model(id)[source]
+

Loads a machine learning model from a STAC Item.

+
+
Parameters:
+

id (Union[str, BatchJob]) – STAC item reference, as URL, batch job (id) or user-uploaded file

+
+
Return type:
+

MlModel

+
+
Returns:
+

+
+
+
+

Added in version 0.10.0.

+
+
+ +
+
+load_result(id, spatial_extent=None, temporal_extent=None, bands=None)[source]
+

Loads batch job results by job id from the server-side user workspace. +The job must have been stored by the authenticated user on the back-end currently connected to.

+
+
Parameters:
+
    +
  • id (str) – The id of a batch job with results.

  • +
  • spatial_extent (Optional[Dict[str, float]]) – limit data to specified bounding box or polygons

  • +
  • temporal_extent (Union[Sequence[Union[str, date, Parameter, PGNode, ProcessBuilderBase, None]], Parameter, str, None]) – limit data to specified temporal interval. +Typically, just a two-item list or tuple containing start and end date. +See Filter on temporal extent for more details on temporal extent handling and shorthand notation.

  • +
  • bands (Optional[List[str]]) – only add the specified bands

  • +
+
+
Return type:
+

DataCube

+
+
Returns:
+

a DataCube

+
+
+
+

Changed in version 0.23.0: Argument temporal_extent: add support for year/month shorthand notation +as discussed at Year/month shorthand notation.

+
+
+

See also

+

openeo.org documentation on process “load_result”.

+
+
+ +
+
+load_stac(url, spatial_extent=None, temporal_extent=None, bands=None, properties=None)[source]
+

Loads data from a static STAC catalog or a STAC API Collection and returns the data as a processable DataCube. +A batch job result can be loaded by providing a reference to it.

+

If supported by the underlying metadata and file format, the data that is added to the data cube can be +restricted with the parameters spatial_extent, temporal_extent and bands. +If no data is available for the given extents, a NoDataAvailable error is thrown.

+

Remarks:

+
    +
  • The bands (and all dimensions that specify nominal dimension labels) are expected to be ordered as +specified in the metadata if the bands parameter is set to null.

  • +
  • If no additional parameter is specified this would imply that the whole data set is expected to be loaded. +Due to the large size of many data sets, this is not recommended and may be optimized by back-ends to only +load the data that is actually required after evaluating subsequent processes such as filters. +This means that the values should be processed only after the data has been limited to the required extent +and as a consequence also to a manageable size.

  • +
+
+
Parameters:
+
    +
  • url (str) –

    The URL to a static STAC catalog (STAC Item, STAC Collection, or STAC Catalog) +or a specific STAC API Collection that allows to filter items and to download assets. +This includes batch job results, which itself are compliant to STAC. +For external URLs, authentication details such as API keys or tokens may need to be included in the URL.

    +

    Batch job results can be specified in two ways:

    +
      +
    • For Batch job results at the same back-end, a URL pointing to the corresponding batch job results +endpoint should be provided. The URL usually ends with /jobs/{id}/results and {id} +is the corresponding batch job ID.

    • +
    • For external results, a signed URL must be provided. Not all back-ends support signed URLs, +which are provided as a link with the link relation canonical in the batch job result metadata.

    • +
    +

  • +
  • spatial_extent (Union[Dict[str, float], Parameter, None]) –

    Limits the data to load to the specified bounding box or polygons.

    +

    For raster data, the process loads the pixel into the data cube if the point at the pixel center intersects +with the bounding box or any of the polygons (as defined in the Simple Features standard by the OGC).

    +

    For vector data, the process loads the geometry into the data cube if the geometry is fully within the +bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). +Empty geometries may only be in the data cube if no spatial extent has been provided.

    +

    The GeoJSON can be one of the following feature types:

    +
      +
    • A Polygon or MultiPolygon geometry,

    • +
    • a Feature with a Polygon or MultiPolygon geometry, or

    • +
    • a FeatureCollection containing at least one Feature with Polygon or MultiPolygon geometries.

    • +
    +

    Set this parameter to None to set no limit for the spatial extent. +Be careful with this when loading large datasets. It is recommended to use this parameter instead of +using filter_bbox() or filter_spatial() directly after loading unbounded data.

    +

  • +
  • temporal_extent (Union[Sequence[Union[str, date, Parameter, PGNode, ProcessBuilderBase, None]], Parameter, str, None]) –

    Limits the data to load to the specified left-closed temporal interval. +Applies to all temporal dimensions. +The interval has to be specified as an array with exactly two elements:

    +
      +
    1. The first element is the start of the temporal interval. +The specified instance in time is included in the interval.

    2. +
    3. The second element is the end of the temporal interval. +The specified instance in time is excluded from the interval.

    4. +
    +

    The second element must always be greater/later than the first element. +Otherwise, a TemporalExtentEmpty exception is thrown.

    +

    Also supports open intervals by setting one of the boundaries to None, but never both.

    +

    Set this parameter to None to set no limit for the temporal extent. +Be careful with this when loading large datasets. It is recommended to use this parameter instead of +using filter_temporal() directly after loading unbounded data.

    +

  • +
  • bands (Optional[List[str]]) –

    Only adds the specified bands into the data cube so that bands that don’t match the list +of band names are not available. Applies to all dimensions of type bands.

    +

    Either the unique band name (metadata field name in bands) or one of the common band names +(metadata field common_name in bands) can be specified. +If the unique band name and the common name conflict, the unique band name has a higher priority.

    +

    The order of the specified array defines the order of the bands in the data cube. +If multiple bands match a common name, all matched bands are included in the original order.

    +

    It is recommended to use this parameter instead of using filter_bands() directly after loading unbounded data.

    +

  • +
  • properties (Optional[Dict[str, Union[str, PGNode, Callable]]]) –

    Limits the data by metadata properties to include only data in the data cube which +all given conditions return True for (AND operation).

    +

    Specify key-value-pairs with the key being the name of the metadata property, +which can be retrieved with the openEO Data Discovery for Collections. +The value must be a condition (user-defined process) to be evaluated against a STAC API. +This parameter is not supported for static STAC.

    +

  • +
+
+
Return type:
+

DataCube

+
+
+
+

Added in version 0.17.0.

+
+
+

Changed in version 0.23.0: Argument temporal_extent: add support for year/month shorthand notation +as discussed at Year/month shorthand notation.

+
+
+

See also

+

openeo.org documentation on process “load_stac”.

+
+
+ +
+
+load_stac_from_job(job, spatial_extent=None, temporal_extent=None, bands=None, properties=None)[source]
+

Convenience function to directly load the results of a finished openEO job +(as a STAC collection) with load_stac() in a new openEO process graph.

+

When available, the “canonical” link (signed URL) of the job results will be used.

+
+
Parameters:
+
    +
  • job (Union[BatchJob, str]) – a BatchJob or job id pointing to a finished job. +Note that the BatchJob approach allows to point +to a batch job on a different back-end.

  • +
  • spatial_extent (Union[Dict[str, float], Parameter, None]) – limit data to specified bounding box or polygons

  • +
  • temporal_extent (Union[Sequence[Union[str, date, Parameter, PGNode, ProcessBuilderBase, None]], Parameter, str, None]) – limit data to specified temporal interval.

  • +
  • bands (Optional[List[str]]) – limit data to the specified bands

  • +
+
+
Return type:
+

DataCube

+
+
+
+

Added in version 0.30.0.

+
+
+ +
+
+load_url(url, format, options=None)[source]
+

Loads a file from a URL

+
+
Parameters:
+
    +
  • url (str) – The URL to read from. Authentication details such as API keys or tokens may need to be included in the URL.

  • +
  • format (str) – The file format to use when loading the data.

  • +
  • options (Optional[dict]) – The file format parameters to use when reading the data. +Must correspond to the parameters that the server reports as supported parameters for the chosen format

  • +
+
+
Returns:
+

new VectorCube instance

+
+
+
+

Warning

+

EXPERIMENTAL: this process is experimental with the potential for major things to change.

+
+
+

Added in version 0.22.0.

+
+
+

See also

+

openeo.org documentation on process “load_url”.

+
+
+ +
+
+remove_service(service_id)[source]
+

Stop and remove a secondary web service.

+
+
Parameters:
+

service_id (str) – service identifier

+
+
Returns:
+

+
+
+
+

Deprecated since version 0.8.0: Use openeo.rest.service.Service.delete_service() +instead.

+
+
+ +
+
+request(method, path, headers=None, auth=None, check_error=True, expected_status=None, **kwargs)[source]
+

Generic request send

+
+ +
+
+save_user_defined_process(user_defined_process_id, process_graph, parameters=None, public=False, summary=None, description=None, returns=None, categories=None, examples=None, links=None)[source]
+

Store a process graph and its metadata on the backend as a user-defined process for the authenticated user.

+
+
Parameters:
+
    +
  • user_defined_process_id (str) – unique identifier for the user-defined process

  • +
  • process_graph (Union[dict, ProcessBuilderBase]) – a process graph

  • +
  • parameters (List[Union[dict, Parameter]]) – a list of parameters

  • +
  • public (bool) – visible to other users?

  • +
  • summary (Optional[str]) – A short summary of what the process does.

  • +
  • description (Optional[str]) – Detailed description to explain the entity. CommonMark 0.29 syntax MAY be used for rich text representation.

  • +
  • returns (Optional[dict]) – Description and schema of the return value.

  • +
  • categories (Optional[List[str]]) – A list of categories.

  • +
  • examples (Optional[List[dict]]) – A list of examples.

  • +
  • links (Optional[List[dict]]) – A list of links.

  • +
+
+
Return type:
+

RESTUserDefinedProcess

+
+
Returns:
+

a RESTUserDefinedProcess instance

+
+
+
+ +
+
+service(service_id)[source]
+

Get the secondary web service based on the id. The service with the given id should already exist.

+

Use openeo.rest.connection.Connection.create_service() to create new services

+
+
Parameters:
+

job_id – the service id of an existing secondary web service

+
+
Return type:
+

Service

+
+
Returns:
+

A service object.

+
+
+
+ +
+
+upload_file(source, target=None)[source]
+

Uploads a file to the given target location in the user workspace on the back-end.

+

If a file at the target path exists in the user workspace it will be replaced.

+
+
Parameters:
+
    +
  • source (Union[Path, str]) – A path to a file on the local file system to upload.

  • +
  • target (Union[str, PurePosixPath, None]) – The desired path (which can contain a folder structure if desired) on the user workspace. +If not set: defaults to the original filename (without any folder structure) of the local file .

  • +
+
+
Return type:
+

UserFile

+
+
+
+ +
+
+user_defined_process(user_defined_process_id)[source]
+

Get the user-defined process based on its id. The process with the given id should already exist.

+
+
Parameters:
+

user_defined_process_id (str) – the id of the user-defined process

+
+
Return type:
+

RESTUserDefinedProcess

+
+
Returns:
+

a RESTUserDefinedProcess instance

+
+
+
+ +
+
+user_jobs()[source]
+
+
Return type:
+

List[dict]

+
+
+
+

Deprecated since version 0.4.10: use list_jobs() instead

+
+
+ +
+
+validate_process_graph(process_graph)[source]
+

Validate a process graph without executing it.

+
+
Parameters:
+

process_graph (Union[dict, FlatGraphableMixin, str, Path, List[FlatGraphableMixin]]) – openEO-style (flat) process graph representation, +or an object that can be converted to such a representation: +a dictionary, a DataCube object, +a string with a JSON representation, +a local file path or URL to a JSON representation, +a MultiResult object, …

+
+
Return type:
+

List[dict]

+
+
Returns:
+

list of errors (dictionaries with “code” and “message” fields)

+
+
+
+ +
+
+vectorcube_from_paths(paths, format, options={})[source]
+

Loads one or more files referenced by url or path that is accessible by the backend.

+
+
Parameters:
+
    +
  • paths (List[str]) – The files to read.

  • +
  • format (str) – The file format to read from. It must be one of the values that the server reports as supported input file formats.

  • +
  • options (dict) – The file format parameters to be used to read the files. Must correspond to the parameters that the server reports as supported parameters for the chosen format.

  • +
+
+
Return type:
+

VectorCube

+
+
Returns:
+

A VectorCube.

+
+
+
+

Added in version 0.14.0.

+
+
+ +
+
+classmethod version_discovery(url, session=None, timeout=None)[source]
+

Do automatic openEO API version discovery from given url, using a “well-known URI” strategy.

+
+
Parameters:
+

url (str) – initial backend url (not including “/.well-known/openeo”)

+
+
Return type:
+

str

+
+
Returns:
+

root url of highest supported backend version

+
+
+
+ +
+
+version_info()[source]
+

List version of the openEO client, API, back-end, etc.

+
+ +
+ +
+
+

openeo.rest.job

+
+
+class openeo.rest.job.BatchJob(job_id, connection)[source]
+

Handle for an openEO batch job, allowing it to describe, start, cancel, inspect results, etc.

+
+

Added in version 0.11.0: This class originally had the more cryptic name RESTJob, +which is still available as legacy alias, +but BatchJob is recommended since version 0.11.0.

+
+
+
+delete()[source]
+

Delete this batch job.

+
+

Added in version 0.20.0: This method was previously called delete_job().

+
+

This method uses openEO endpoint DELETE /jobs/{job_id}

+
+ +
+
+delete_job()
+

Delete this batch job.

+
+

Deprecated since version 0.20.0: Usage of this legacy method is deprecated. Use delete() instead.

+
+
+ +
+
+describe()[source]
+

Get detailed metadata about a submitted batch job +(title, process graph, status, progress, …). +:rtype: dict

+
+

Added in version 0.20.0: This method was previously called describe_job().

+
+

This method uses openEO endpoint GET /jobs/{job_id}

+
+ +
+
+describe_job()
+

Get detailed metadata about a submitted batch job +(title, process graph, status, progress, …). +:rtype: dict

+
+

Deprecated since version 0.20.0: Usage of this legacy method is deprecated. Use describe() instead.

+
+
+ +
+
+download_result(target=None)[source]
+

Download single job result to the target file path or into folder (current working dir by default).

+

Fails if there are multiple result files.

+
+
Parameters:
+

target (Union[str, Path]) – String or path where the file should be downloaded to.

+
+
Return type:
+

Path

+
+
+
+ +
+
+download_results(target=None)[source]
+

Download all job result files into given folder (current working dir by default).

+

The names of the files are taken directly from the backend.

+
+
Parameters:
+

target (Union[str, Path]) – String/path, folder where to put the result files.

+
+
Return type:
+

Dict[Path, dict]

+
+
Returns:
+

file_list: Dict containing the downloaded file path as value and asset metadata

+
+
+
+

Deprecated since version 0.4.10: Instead use BatchJob.get_results() and the more +flexible download functionality of JobResults

+
+
+ +
+
+estimate()[source]
+

Calculate time/cost estimate for a job.

+

This method uses openEO endpoint GET /jobs/{job_id}/estimate

+
+ +
+
+estimate_job()
+

Calculate time/cost estimate for a job.

+
+

Deprecated since version 0.20.0: Usage of this legacy method is deprecated. Use estimate() instead.

+
+
+ +
+
+get_result()[source]
+
+

Deprecated since version 0.4.10: Use BatchJob.get_results() instead.

+
+
+ +
+
+get_results()[source]
+

Get handle to batch job results for result metadata inspection or downloading resulting assets. +:rtype: JobResults

+
+

Added in version 0.4.10.

+
+
+ +
+
+get_results_metadata_url(*, full=False)[source]
+

Get results metadata URL

+
+
Return type:
+

str

+
+
+
+ +
+
+job_id
+

Unique identifier of the batch job (string).

+
+ +
+
+list_results()[source]
+

Get batch job results metadata. +:rtype: dict

+
+

Deprecated since version 0.4.10: Use get_results() instead.

+
+
+ +
+
+logs(offset=None, level=None)[source]
+

Retrieve job logs.

+
+
Parameters:
+
    +
  • offset (Optional[str]) –

    The last identifier (property id of a LogEntry) the client has received.

    +

    If provided, the back-ends only sends the entries that occurred after the specified identifier. +If not provided or empty, start with the first entry.

    +

    Defaults to None.

    +

  • +
  • level (Union[int, str, None]) –

    Minimum log level to retrieve.

    +

    You can use either constants from Python’s standard module logging +or their names (case-insensitive).

    +
    +
    For example:

    logging.INFO, "info" or "INFO" can all be used to show the messages +for level logging.INFO and above, i.e. also logging.WARNING and +logging.ERROR will be included.

    +
    +
    +

    Default is to show all log levels, in other words logging.DEBUG. +This is also the result when you explicitly pass log_level=None or log_level=””.

    +

  • +
+
+
Return type:
+

List[LogEntry]

+
+
Returns:
+

A list containing the log entries for the batch job.

+
+
+
+ +
+
+run_synchronous(outputfile=None, print=<built-in function print>, max_poll_interval=60, connection_retry_interval=30)[source]
+

Start the job, wait for it to finish and download result

+
+
Return type:
+

BatchJob

+
+
+
+ +
+
+start()[source]
+

Start this batch job.

+
+
Return type:
+

BatchJob

+
+
Returns:
+

Started batch job

+
+
+
+

Added in version 0.20.0: This method was previously called start_job().

+
+

This method uses openEO endpoint POST /jobs/{job_id}/results

+
+ +
+
+start_and_wait(print=<built-in function print>, max_poll_interval=60, connection_retry_interval=30, soft_error_max=10)[source]
+

Start the batch job, poll its status and wait till it finishes (or fails)

+
+
Parameters:
+
    +
  • print – print/logging function to show progress/status

  • +
  • max_poll_interval (int) – maximum number of seconds to sleep between status polls

  • +
  • connection_retry_interval (int) – how long to wait when status poll failed due to connection issue

  • +
  • soft_error_max – maximum number of soft errors (e.g. temporary connection glitches) to allow

  • +
+
+
Return type:
+

BatchJob

+
+
Returns:
+

+
+
+
+ +
+
+start_job()
+

Start this batch job. +:rtype: BatchJob

+
+

Deprecated since version 0.20.0: Usage of this legacy method is deprecated. Use start() instead.

+
+
+ +
+
+status()[source]
+

Get the status of the batch job

+
+
Return type:
+

str

+
+
Returns:
+

batch job status, one of “created”, “queued”, “running”, “canceled”, “finished” or “error”.

+
+
+
+ +
+
+stop()[source]
+

Stop this batch job.

+
+

Added in version 0.20.0: This method was previously called stop_job().

+
+

This method uses openEO endpoint DELETE /jobs/{job_id}/results

+
+ +
+
+stop_job()
+

Stop this batch job.

+
+

Deprecated since version 0.20.0: Usage of this legacy method is deprecated. Use stop() instead.

+
+
+ +
+ +
+
+class openeo.rest.job.JobResults(job)[source]
+

Results of a batch job: listing of one or more output files (assets) +and some metadata.

+
+

Added in version 0.4.10.

+
+
+
+download_file(target=None, name=None)[source]
+

Download single asset. Can be used when there is only one asset in the +JobResults, or when the desired asset name is given explicitly.

+
+
Parameters:
+
    +
  • target (Union[Path, str]) – path to download to. Can be an existing directory +(in which case the filename advertised by backend will be used) +or full file name. By default, the working directory will be used.

  • +
  • name (str) – asset name to download (not required when there is only one asset)

  • +
+
+
Return type:
+

Path

+
+
Returns:
+

path of downloaded asset

+
+
+
+ +
+
+download_files(target=None, include_stac_metadata=True)[source]
+

Download all assets to given folder.

+
+
Parameters:
+
    +
  • target (Union[Path, str]) – path to folder to download to (must be a folder if it already exists)

  • +
  • include_stac_metadata (bool) – whether to download the job result metadata as a STAC (JSON) file.

  • +
+
+
Return type:
+

List[Path]

+
+
Returns:
+

list of paths to the downloaded assets.

+
+
+
+ +
+
+get_asset(name=None)[source]
+

Get single asset by name or without name if there is only one.

+
+
Return type:
+

ResultAsset

+
+
+
+ +
+
+get_assets()[source]
+

Get all assets from the job results.

+
+
Return type:
+

List[ResultAsset]

+
+
+
+ +
+
+get_metadata(force=False)[source]
+

Get batch job results metadata (parsed JSON)

+
+
Return type:
+

dict

+
+
+
+ +
+ +
+
+class openeo.rest.job.RESTJob(job_id, connection)[source]
+

Legacy alias for BatchJob.

+
+

Deprecated since version 0.11.0: Use BatchJob instead

+
+
+ +
+
+class openeo.rest.job.ResultAsset(job, name, href, metadata)[source]
+

Result asset of a batch job (e.g. a GeoTIFF or JSON file)

+
+

Added in version 0.4.10.

+
+
+
+download(target=None, *, chunk_size=10000000)[source]
+

Download asset to given location

+
+
Parameters:
+
    +
  • target (Union[str, Path, None]) – download target path. Can be an existing folder +(in which case the filename advertised by backend will be used) +or full file name. By default, the working directory will be used.

  • +
  • chunk_size (int) – chunk size for streaming response.

  • +
+
+
Return type:
+

Path

+
+
+
+ +
+
+href
+

Download URL of the asset.

+
+ +
+
+load_bytes()[source]
+

Load asset in memory as raw bytes.

+
+
Return type:
+

bytes

+
+
+
+ +
+
+load_json()[source]
+

Load asset in memory and parse as JSON.

+
+
Return type:
+

dict

+
+
+
+ +
+
+metadata
+

Asset metadata provided by the backend, possibly containing keys “type” (for media type), “roles”, “title”, “description”.

+
+ +
+
+name
+

Asset name as advertised by the backend.

+
+ +
+ +
+
+

openeo.rest.conversions

+

Helpers for data conversions between Python ecosystem data types and openEO data structures.

+
+
+exception openeo.rest.conversions.InvalidTimeSeriesException[source]
+
+ +
+
+openeo.rest.conversions.datacube_from_file(filename, fmt='netcdf')[source]
+
+
Return type:
+

XarrayDataCube

+
+
+
+

Deprecated since version 0.7.0: Use XarrayDataCube.from_file() instead.

+
+
+ +
+
+openeo.rest.conversions.datacube_plot(datacube, *args, **kwargs)[source]
+
+

Deprecated since version 0.7.0: Use XarrayDataCube.plot() instead.

+
+
+ +
+
+openeo.rest.conversions.datacube_to_file(datacube, filename, fmt='netcdf')[source]
+
+

Deprecated since version 0.7.0: Use XarrayDataCube.save_to_file() instead.

+
+
+ +
+
+openeo.rest.conversions.timeseries_json_to_pandas(timeseries, index='date', auto_collapse=True)[source]
+

Convert a timeseries JSON object as returned by the aggregate_spatial process to a pandas DataFrame object

+

This timeseries data has three dimensions in general: date, polygon index and band index. +One of these will be used as index of the resulting dataframe (as specified by the index argument), +and the other two will be used as multilevel columns. +When there is just a single polygon or band in play, the dataframe will be simplified +by removing the corresponding dimension if auto_collapse is enabled (on by default).

+
+
Parameters:
+
    +
  • timeseries (dict) – dictionary as returned by aggregate_spatial

  • +
  • index (str) – which dimension should be used for the DataFrame index: ‘date’ or ‘polygon’

  • +
  • auto_collapse – whether single band or single polygon cases should be simplified automatically

  • +
+
+
Return type:
+

DataFrame

+
+
Returns:
+

pandas DataFrame or Series

+
+
+
+ +
+
+

openeo.rest.udp

+
+
+class openeo.rest.udp.RESTUserDefinedProcess(user_defined_process_id, connection)[source]
+

Wrapper for a user-defined process stored (or to be stored) on an openEO back-end

+
+
+delete()[source]
+

Remove user-defined process from back-end

+
+
Return type:
+

None

+
+
+
+ +
+
+describe()[source]
+

Get metadata of this user-defined process.

+
+
Return type:
+

dict

+
+
+
+ +
+
+store(process_graph, parameters=None, public=False, summary=None, description=None, returns=None, categories=None, examples=None, links=None)[source]
+

Store a process graph and its metadata on the backend as a user-defined process

+
+ +
+
+update(process_graph, parameters=None, public=False, summary=None, description=None)[source]
+
+

Deprecated since version 0.4.11: Use store instead. Method update is misleading: OpenEO API +does not provide (partial) updates of user-defined processes, +only fully overwriting ‘store’ operations.

+
+
+ +
+ +
+
+openeo.rest.udp.build_process_dict(process_graph, process_id=None, summary=None, description=None, parameters=None, returns=None, categories=None, examples=None, links=None)[source]
+

Build a dictionary describing a process with metadaa (process_graph, parameters, description, …)

+
+
Parameters:
+
    +
  • process_graph (Union[dict, FlatGraphableMixin, Path, List[FlatGraphableMixin]]) – dict or builder representing a process graph

  • +
  • process_id (Optional[str]) – identifier of the process

  • +
  • summary (Optional[str]) – short summary of what the process does

  • +
  • description (Optional[str]) – detailed description

  • +
  • parameters (Optional[List[Union[dict, Parameter]]]) – list of process parameters (which have name, schema, default value, …)

  • +
  • returns (Optional[dict]) – description and schema of what the process returns

  • +
  • categories (Optional[List[str]]) – list of categories

  • +
  • examples (Optional[List[dict]]) – list of examples, may be used for unit tests

  • +
  • links (Optional[List[dict]]) – list of links related to the process

  • +
+
+
Return type:
+

dict

+
+
Returns:
+

dictionary in openEO “process graph with metadata” format

+
+
+
+ +
+
+

openeo.rest.userfile

+
+
+class openeo.rest.userfile.UserFile(path, *, connection, metadata=None)[source]
+

Handle to a (user-uploaded) file in the user workspace on a openEO back-end.

+
+
+delete()[source]
+

Delete the user-uploaded file from the user workspace on the back-end.

+
+ +
+
+download(target=None)[source]
+

Downloads a user-uploaded file from the user workspace on the back-end +locally to the given location.

+
+
Parameters:
+

target (Union[Path, str]) – local download target path. Can be an existing folder +(in which case the file name advertised by backend will be used) +or full file name. By default, the working directory will be used.

+
+
Return type:
+

Path

+
+
+
+ +
+
+classmethod from_metadata(metadata, connection)[source]
+

Build UserFile from a workspace file metadata dictionary.

+
+
Return type:
+

UserFile

+
+
+
+ +
+
+to_dict()[source]
+

Returns the provided metadata as dict.

+
+
Return type:
+

Dict[str, Any]

+
+
+
+ +
+
+upload(source)[source]
+

Uploads a local file to the path corresponding to this UserFile in the user workspace +and returns new UserFile of newly uploaded file.

+
+
+

Tip

+

Usually you’ll just need +Connection.upload_file() +instead of this UserFile method.

+
+
+

If the file exists in the user workspace it will be replaced.

+
+
Parameters:
+

source (Union[Path, str]) – A path to a file on the local file system to upload.

+
+
Return type:
+

UserFile

+
+
Returns:
+

new UserFile instance of the newly uploaded file

+
+
+
+ +
+ +
+
+

openeo.udf

+
+
+class openeo.udf.udf_data.UdfData(proj=None, datacube_list=None, feature_collection_list=None, structured_data_list=None, user_context=None)[source]
+

Container for data passed to a user defined function (UDF)

+
+
+property datacube_list: List[XarrayDataCube] | None
+

Get the data cube list

+
+ +
+
+property feature_collection_list: List[FeatureCollection] | None
+

get all feature collections as list

+
+ +
+
+classmethod from_dict(udf_dict)[source]
+

Create a udf data object from a python dictionary that was created from +the JSON definition of the UdfData class

+
+
Parameters:
+

udf_dict (dict) – The dictionary that contains the udf data definition

+
+
Return type:
+

UdfData

+
+
+
+ +
+
+get_datacube_list()[source]
+

Get the data cube list

+
+
Return type:
+

Optional[List[XarrayDataCube]]

+
+
+
+ +
+
+get_feature_collection_list()[source]
+

get all feature collections as list

+
+
Return type:
+

Optional[List[FeatureCollection]]

+
+
+
+ +
+
+get_structured_data_list()[source]
+

Get all structured data entries

+
+
Return type:
+

Optional[List[StructuredData]]

+
+
Returns:
+

A list of StructuredData objects

+
+
+
+ +
+
+set_datacube_list(datacube_list)[source]
+

Set the data cube list

+
+
Parameters:
+

datacube_list (Optional[List[XarrayDataCube]]) – A list of data cubes

+
+
+
+ +
+
+set_structured_data_list(structured_data_list)[source]
+

Set the list of structured data

+
+
Parameters:
+

structured_data_list (Optional[List[StructuredData]]) – A list of StructuredData objects

+
+
+
+ +
+
+property structured_data_list: List[StructuredData] | None
+

Get all structured data entries

+
+
Returns:
+

A list of StructuredData objects

+
+
+
+ +
+
+to_dict()[source]
+

Convert this UdfData object into a dictionary that can be converted into +a valid JSON representation

+
+
Return type:
+

dict

+
+
+
+ +
+
+property user_context: dict
+

Return the user context that was passed to the run_udf function

+
+ +
+ +
+
+class openeo.udf.xarraydatacube.XarrayDataCube(array)[source]
+

This is a thin wrapper around xarray.DataArray +providing a basic “DataCube” interface for openEO UDF usage around multi-dimensional data.

+
+
+property array: DataArray
+

Get the xarray.DataArray that contains the data and dimension definition

+
+ +
+
+classmethod from_dict(xdc_dict)[source]
+

Create a XarrayDataCube from a Python dictionary that was created from +the JSON definition of the data cube

+
+
Parameters:
+

data – The dictionary that contains the data cube definition

+
+
Return type:
+

XarrayDataCube

+
+
+
+ +
+
+classmethod from_file(path, fmt=None, **kwargs)[source]
+

Load data file as XarrayDataCube in memory

+
+
Parameters:
+
    +
  • path (Union[str, Path]) – the file on disk

  • +
  • fmt – format to load from, e.g. “netcdf” or “json” +(will be auto-detected when not specified)

  • +
+
+
Return type:
+

XarrayDataCube

+
+
Returns:
+

loaded data cube

+
+
+
+ +
+
+get_array()[source]
+

Get the xarray.DataArray that contains the data and dimension definition

+
+
Return type:
+

DataArray

+
+
+
+ +
+
+plot(title=None, limits=None, show_bandnames=True, show_dates=True, show_axeslabels=False, fontsize=10.0, oversample=1, cmap='RdYlBu_r', cbartext=None, to_file=None, to_show=True)[source]
+

Visualize a XarrayDataCube with matplotlib

+
+
Parameters:
+
    +
  • datacube – data to plot

  • +
  • title (str) – title text drawn in the top left corner (default: nothing)

  • +
  • limits – range of the contour plot as a tuple(min,max) (default: None, in which case the min/max is computed from the data)

  • +
  • show_bandnames (bool) – whether to plot the column names (default: True)

  • +
  • show_dates (bool) – whether to show the dates for each row (default: True)

  • +
  • show_axeslabels (bool) – whether to show the labels on the axes (default: False)

  • +
  • fontsize (float) – font size in pixels (default: 10)

  • +
  • oversample (float) – one value is plotted into oversample x oversample number of pixels (default: 1 which means each value is plotted as a single pixel)

  • +
  • cmap (Union[str, ‘matplotlib.colors.Colormap’]) – built-in matplotlib color map name or ColorMap object (default: RdYlBu_r which is a blue-yellow-red rainbow)

  • +
  • cbartext (str) – text on top of the legend (default: nothing)

  • +
  • to_file (str) – filename to save the image to (default: None, which means no file is generated)

  • +
  • to_show (bool) – whether to show the image in a matplotlib window (default: True)

  • +
+
+
Returns:
+

None

+
+
+
+ +
+
+save_to_file(path, fmt=None, **kwargs)[source]
+

Store XarrayDataCube to file

+
+
Parameters:
+
    +
  • path (Union[str, Path]) – destination file on disk

  • +
  • fmt – format to save as, e.g. “netcdf” or “json” +(will be auto-detected when not specified)

  • +
+
+
+
+ +
+
+to_dict()[source]
+

Convert this hypercube into a dictionary that can be converted into +a valid JSON representation

+
+
Return type:
+

dict

+
+
+
>>> example = {
+...     "id": "test_data",
+...     "data": [
+...         [[0.0, 0.1], [0.2, 0.3]],
+...         [[0.0, 0.1], [0.2, 0.3]],
+...     ],
+...     "dimension": [
+...         {"name": "time", "coordinates": ["2001-01-01", "2001-01-02"]},
+...         {"name": "X", "coordinates": [50.0, 60.0]},
+...         {"name": "Y"},
+...     ],
+... }
+
+
+
+ +
+ +
+
+class openeo.udf.structured_data.StructuredData(data, description=None, type=None)[source]
+

This class represents structured data that is produced by an UDF and can not be represented +as a raster or vector data cube. For example: the result of a statistical +computation.

+

Usage example:

+
>>> StructuredData([3, 5, 8, 13])
+>>> StructuredData({"mean": 5, "median": 8})
+>>> StructuredData([('col_1', 'col_2'), (1, 2), (2, 3)], type="table")
+
+
+
+ +

Note: this module was initially developed under the openeo-udf project (https://github.com/Open-EO/openeo-udf)

+
+
+openeo.udf.run_code.execute_local_udf(udf, datacube, fmt='netcdf')[source]
+

Locally executes an user defined function on a previously downloaded datacube.

+
+
Parameters:
+
    +
  • udf (Union[str, UDF]) – the code of the user defined function

  • +
  • datacube (Union[str, DataArray, XarrayDataCube]) – the path to the downloaded data in disk or a DataCube

  • +
  • fmt – format of the file if datacube is string

  • +
+
+
Returns:
+

the resulting DataCube

+
+
+
+ +
+
+openeo.udf.run_code.extract_udf_dependencies(udf)[source]
+

Extract dependencies from UDF code declared in a top-level comment block +following the inline script metadata specification (PEP 508).

+

Basic example UDF snippet declaring expected dependencies as embedded metadata +in a comment block:

+
# /// script
+# dependencies = [
+#     "geojson",
+# ]
+# ///
+
+import geojson
+
+def apply_datacube(cube: xarray.DataArray, context: dict) -> xarray.DataArray:
+    ...
+
+
+
+

See also

+

Standard for declaring Python UDF dependencies for more in-depth information.

+
+
+
Parameters:
+

udf (Union[str, UDF]) – UDF code as a string or UDF object

+
+
Return type:
+

Optional[List[str]]

+
+
Returns:
+

List of extracted dependencies or None when no valid metadata block with dependencies was found.

+
+
+
+

Added in version 0.30.0.

+
+
+ +

Debug utilities for UDFs

+
+
+openeo.udf.debug.inspect(data=None, message='', code='User', level='info')[source]
+

Implementation of the openEO inspect process for UDF contexts.

+

Note that it is up to the back-end implementation to properly capture this logging +and include it in the batch job logs.

+
+
Parameters:
+
    +
  • data – data to log

  • +
  • message (str) – message to send in addition to the data

  • +
  • code (str) – A label to help identify one or more log entries

  • +
  • level (str) – The severity level of this message. Allowed values: “error”, “warning”, “info”, “debug”

  • +
+
+
+
+

Added in version 0.10.1.

+
+
+

See also

+

Logging from a UDF

+
+
+ +
+
+

openeo.util

+

Various utilities and helpers.

+
+
+class openeo.util.BBoxDict(*, west, south, east, north, crs=None)[source]
+

Dictionary based helper to easily create/work with bounding box dictionaries +(having keys “west”, “south”, “east”, “north”, and optionally “crs”).

+
+
Parameters:
+

crs (Union[int, str, None]) – value describing the coordinate reference system. +Typically just an int (interpreted as EPSG code, e.g. 4326) +or a string (handled as authority string, e.g. "EPSG:4326"). +See openeo.util.normalize_crs() for more details about additional normalization that is applied to this argument.

+
+
+
+

Added in version 0.10.1.

+
+
+
+classmethod from_dict(data)[source]
+

Build from dictionary with at least keys “west”, “south”, “east”, and “north”.

+
+
Return type:
+

BBoxDict

+
+
+
+ +
+
+classmethod from_sequence(seq, crs=None)[source]
+

Build from sequence of 4 bounds (west, south, east and north).

+
+
Return type:
+

BBoxDict

+
+
+
+ +
+ +
+
+openeo.util.load_json_resource(src)[source]
+

Helper to load some kind of JSON resource

+
+
Parameters:
+

src (Union[str, Path]) – a JSON resource: a raw JSON string, +a path to (local) JSON file, or a URL to a remote JSON resource

+
+
Return type:
+

dict

+
+
Returns:
+

data structured parsed from JSON

+
+
+
+ +
+
+openeo.util.normalize_crs(crs, *, use_pyproj=True)[source]
+

Normalize the given value (describing a CRS or Coordinate Reference System) +to an openEO compatible EPSG code (int) or WKT2 CRS string.

+

At minimum, the following input values are handled:

+
    +
  • an integer value (e.g. 4326) is interpreted as an EPSG code

  • +
  • a string that just contains an integer (e.g. "4326") +or with and additional "EPSG:" prefix (e.g. "EPSG:4326") +will also be interpreted as an EPSG value

  • +
+

Additional support and behavior depends on the availability of the pyproj library:

+
    +
  • When available, it will be used for parsing and validation: +everything supported by pyproj.CRS.from_user_input is allowed. +See the pyproj docs for more details.

  • +
  • Otherwise, some best effort validation is done: +EPSG looking integer or string values will be parsed as such as discussed above. +Other strings will be assumed to be WKT2 already. +Other data structures will not be accepted.

  • +
+
+
Parameters:
+
    +
  • crs (Any) – value that encodes a coordinate reference system, typically just an int (EPSG code) or string (authority string). +If the pyproj library is available, everything supported by it is allowed.

  • +
  • use_pyproj (bool) – whether pyproj should be leveraged at all +(mainly useful for testing the “no pyproj available” code path)

  • +
+
+
Return type:
+

Union[None, int, str]

+
+
Returns:
+

EPSG code as int, or WKT2 string. Or None if input was empty.

+
+
Raises:
+

ValueError – When the given CRS data can not be parsed/converted/normalized.

+
+
+
+ +
+
+openeo.util.to_bbox_dict(x, *, crs=None)[source]
+

Convert given data or object to a bounding box dictionary +(having keys “west”, “south”, “east”, “north”, and optionally “crs”).

+

Supports various input types/formats:

+
    +
  • list/tuple (assumed to be in west-south-east-north order)

    +
    >>> to_bbox_dict([3, 50, 4, 51])
    +{'west': 3, 'south': 50, 'east': 4, 'north': 51}
    +
    +
    +
  • +
  • dictionary (unnecessary items will be stripped)

    +
    >>> to_bbox_dict({
    +...     "color": "red", "shape": "triangle",
    +...     "west": 1, "south": 2, "east": 3, "north": 4, "crs": "EPSG:4326",
    +... })
    +{'west': 1, 'south': 2, 'east': 3, 'north': 4, 'crs': 'EPSG:4326'}
    +
    +
    +
  • +
  • a shapely geometry

  • +
+
+

Added in version 0.10.1.

+
+
+
Parameters:
+
    +
  • x (Any) – input data that describes west-south-east-north bounds in some way, e.g. as a dictionary, +a list, a tuple, ashapely geometry, …

  • +
  • crs (Union[int, str, None]) – (optional) CRS field

  • +
+
+
Return type:
+

BBoxDict

+
+
Returns:
+

dictionary (subclass) with keys “west”, “south”, “east”, “north”, and optionally “crs”.

+
+
+
+ +
+
+

openeo.processes

+
+
+openeo.processes.process(process_id, arguments=None, namespace=None, **kwargs)
+

Apply process, using given arguments

+
+
Parameters:
+
    +
  • process_id (str) – process id of the process.

  • +
  • arguments (dict) – argument dictionary for the process.

  • +
  • namespace (Optional[str]) – process namespace (only necessary to specify for non-predefined or non-user-defined processes)

  • +
+
+
Returns:
+

new ProcessBuilder instance

+
+
+
+ +
+
+

Graph building

+

Various utilities and helpers to simplify the construction of openEO process graphs.

+
+

Public openEO process graph building utilities

+
+
+
+class openeo.rest.graph_building.CollectionProperty(name, _builder=None)[source]
+

Helper object to easily create simple collection metadata property filters +to be used with Connection.load_collection().

+
+

Note

+

This class should not be used directly by end user code. +Use the collection_property() factory instead.

+
+
+

Warning

+

this is an experimental feature, naming might change.

+
+
+ +
+
+openeo.rest.graph_building.collection_property(name)[source]
+

Helper to easily create simple collection metadata property filters +to be used with Connection.load_collection().

+

Usage example:

+
from openeo import collection_property
+...
+
+connection.load_collection(
+    ...
+    properties=[
+        collection_property("eo:cloud_cover") <= 75,
+        collection_property("platform") == "Sentinel-2B",
+    ]
+)
+
+
+
+

Warning

+

this is an experimental feature, naming might change.

+
+
+

Added in version 0.26.0.

+
+
+
Parameters:
+

name (str) – name of the collection property to filter on

+
+
Return type:
+

CollectionProperty

+
+
Returns:
+

an object that supports operators like <=, == to easily build simple property filters.

+
+
+
+ +
+

Internal openEO process graph building utilities

+

Internal functionality for abstracting, building, manipulating and processing openEO process graphs.

+
+
+
+class openeo.internal.graph_building.FlatGraphableMixin[source]
+

Mixin for classes that can be exported/converted to +a “flat graph” representation of an openEO process graph.

+
+
+print_json(*, file=None, indent=2, separators=None, end='\\n')[source]
+

Print interoperable JSON representation of the process graph.

+

See DataCube.to_json() to get the JSON representation as a string +and Export a process graph for more usage information.

+

Also see json.dumps docs for more information on the JSON formatting options.

+
+
Parameters:
+
    +
  • file – file-like object (stream) to print to (current sys.stdout by default). +Or a path (string or pathlib.Path) to a file to write to.

  • +
  • indent (Optional[int]) – JSON indentation level.

  • +
  • separators (Optional[Tuple[str, str]]) – (optional) tuple of item/key separators.

  • +
  • end (str) – additional string to be printed at the end (newline by default).

  • +
+
+
+
+

Added in version 0.12.0.

+
+
+

Added in version 0.23.0: added the end argument.

+
+
+ +
+
+to_json(*, indent=2, separators=None)[source]
+

Get interoperable JSON representation of the process graph.

+

See DataCube.print_json() to directly print the JSON representation +and Export a process graph for more usage information.

+

Also see json.dumps docs for more information on the JSON formatting options.

+
+
Parameters:
+
    +
  • indent (Optional[int]) – JSON indentation level.

  • +
  • separators (Optional[Tuple[str, str]]) – (optional) tuple of item/key separators.

  • +
+
+
Return type:
+

str

+
+
Returns:
+

JSON string

+
+
+
+ +
+ +
+
+class openeo.internal.graph_building.PGNode(process_id, arguments=None, namespace=None, **kwargs)[source]
+

A process node in a process graph: has at least a process_id and arguments.

+

Note that a full openEO “process graph” is essentially a directed acyclic graph of nodes +pointing to each other. A full process graph is practically equivalent with its “result” node, +as it points (directly or indirectly) to all the other nodes it depends on.

+
+

Warning

+

This class is an implementation detail meant for internal use. +It is not recommended for general use in normal user code. +Instead, use process graph abstraction builders like +Connection.load_collection(), +Connection.datacube_from_process(), +Connection.datacube_from_flat_graph(), +Connection.datacube_from_json(), +Connection.load_ml_model(), +openeo.processes.process(),

+
+
+
+flat_graph()[source]
+

Get the process graph in internal flat dict representation.

+
+
Return type:
+

Dict[str, dict]

+
+
+
+ +
+
+static from_flat_graph(flat_graph, parameters=None)[source]
+

Unflatten a given flat dict representation of a process graph and return result node.

+
+
Return type:
+

PGNode

+
+
+
+ +
+
+to_dict()[source]
+

Convert process graph to a nested dictionary structure. +Uses deep copy style: nodes that are reused in graph will be deduplicated

+
+
Return type:
+

dict

+
+
+
+ +
+
+static to_process_graph_argument(value)[source]
+

Normalize given argument properly to a “process_graph” argument +to be used as reducer/subprocess for processes like +reduce_dimension, aggregate_spatial, apply, merge_cubes, resample_cube_temporal

+
+
Return type:
+

dict

+
+
+
+ +
+
+update_arguments(**kwargs)[source]
+

Add/Update arguments of the process node.

+
+

Added in version 0.10.1.

+
+
+ +
+
+walk_nodes()[source]
+

Walk this node and all it’s parents

+
+
Return type:
+

Iterator[PGNode]

+
+
+
+ +
+ +
+
+

Testing

+

Various utilities for testing use cases (unit tests, integration tests, benchmarking, …)

+
+

openeo.testing

+

Utilities for testing of openEO client workflows.

+
+
+class openeo.testing.TestDataLoader(root)[source]
+

Helper to resolve paths to test data files, load them as JSON, optionally preprocess them, etc.

+

It’s intended to be used as a pytest fixture, e.g. from conftest.py:

+
@pytest.fixture
+def test_data() -> TestDataLoader:
+    return TestDataLoader(root=Path(__file__).parent / "data")
+
+
+
+

Added in version 0.30.0.

+
+
+
+get_path(filename)[source]
+

Get absolute path to a test data file

+
+
Return type:
+

Path

+
+
+
+ +
+
+load_json(filename, preprocess=None)[source]
+

Parse data from a test JSON file

+
+
Return type:
+

dict

+
+
+
+ +
+ +
+
+

openeo.testing.results

+

Assert functions for comparing actual (batch job) results against expected reference data.

+
+
+openeo.testing.results.assert_job_results_allclose(actual, expected, *, rtol=1e-06, atol=1e-06, tmp_path=None)[source]
+

Assert that two job results sets are equal (with tolerance).

+
+
Parameters:
+
    +
  • actual (Union[BatchJob, JobResults, str, Path]) – actual job results, provided as BatchJob object, +JobResults() object or path to directory with downloaded assets.

  • +
  • expected (Union[BatchJob, JobResults, str, Path]) – expected job results, provided as BatchJob object, +JobResults() object or path to directory with downloaded assets.

  • +
  • rtol (float) – relative tolerance

  • +
  • atol (float) – absolute tolerance

  • +
  • tmp_path (Optional[Path]) – root temp path to download results if needed. +It’s recommended to pass pytest’s tmp_path fixture here

  • +
+
+
Raises:
+

AssertionError – if not equal within the given tolerance

+
+
+
+

Added in version 0.31.0.

+
+
+

Warning

+

This function is experimental and subject to change.

+
+
+ +
+
+openeo.testing.results.assert_xarray_allclose(actual, expected, *, rtol=1e-06, atol=1e-06)[source]
+

Assert that two Xarray DataSet or DataArray instances are equal (with tolerance).

+
+
Parameters:
+
    +
  • actual (Union[Dataset, DataArray, str, Path]) – actual data, provided as Xarray object or path to NetCDF/GeoTIFF file.

  • +
  • expected (Union[Dataset, DataArray, str, Path]) – expected or reference data, provided as Xarray object or path to NetCDF/GeoTIFF file.

  • +
  • rtol (float) – relative tolerance

  • +
  • atol (float) – absolute tolerance

  • +
+
+
Raises:
+

AssertionError – if not equal within the given tolerance

+
+
+
+

Added in version 0.31.0.

+
+
+

Warning

+

This function is experimental and subject to change.

+
+
+ +
+
+openeo.testing.results.assert_xarray_dataarray_allclose(actual, expected, *, rtol=1e-06, atol=1e-06)[source]
+

Assert that two Xarray DataArray instances are equal (with tolerance).

+
+
Parameters:
+
    +
  • actual (Union[DataArray, str, Path]) – actual data, provided as Xarray DataArray object or path to NetCDF/GeoTIFF file.

  • +
  • expected (Union[DataArray, str, Path]) – expected or reference data, provided as Xarray DataArray object or path to NetCDF/GeoTIFF file.

  • +
  • rtol (float) – relative tolerance

  • +
  • atol (float) – absolute tolerance

  • +
+
+
Raises:
+

AssertionError – if not equal within the given tolerance

+
+
+
+

Added in version 0.31.0.

+
+
+

Warning

+

This function is experimental and subject to change.

+
+
+ +
+
+openeo.testing.results.assert_xarray_dataset_allclose(actual, expected, *, rtol=1e-06, atol=1e-06)[source]
+

Assert that two Xarray DataSet instances are equal (with tolerance).

+
+
Parameters:
+
    +
  • actual (Union[Dataset, str, Path]) – actual data, provided as Xarray Dataset object or path to NetCDF/GeoTIFF file

  • +
  • expected (Union[Dataset, str, Path]) – expected or reference data, provided as Xarray Dataset object or path to NetCDF/GeoTIFF file.

  • +
  • rtol (float) – relative tolerance

  • +
  • atol (float) – absolute tolerance

  • +
+
+
Raises:
+

AssertionError – if not equal within the given tolerance

+
+
+
+

Added in version 0.31.0.

+
+
+

Warning

+

This function is experimental and subject to change.

+
+
+ +
+
+
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/auth.html b/auth.html new file mode 100644 index 000000000..58a07fa12 --- /dev/null +++ b/auth.html @@ -0,0 +1,665 @@ + + + + + + + + Authentication and Account Management — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +
+

Authentication and Account Management

+

While a couple of openEO operations can be done +anonymously, most of the interesting parts +of the API require you to identify as a registered +user. +The openEO API specifies two ways to authenticate +as a user:

+
    +
  • OpenID Connect (recommended, but not always straightforward to use)

  • +
  • Basic HTTP Authentication (not recommended, but practically easier in some situations)

  • +
+

To illustrate how to authenticate with the openEO Python Client Library, +we start form a back-end connection:

+
import openeo
+
+connection = openeo.connect("https://openeo.example.com")
+
+
+
+

Basic HTTP Auth

+

Let’s start with the easiest authentication method, +based on the Basic HTTP authentication scheme. +It is however not recommended for various reasons, +such as its limited security measures. +For example, if you are connecting to a back-end with a http:// URL +instead of a https:// one, you should certainly not use basic HTTP auth.

+

With these security related caveats out of the way, you authenticate +using your username and password like this:

+
connection.authenticate_basic("john", "j0hn123")
+
+
+

Subsequent usage of the connection object connection will +use authenticated calls. +For example, show information about the authenticated user:

+
>>> connection.describe_account()
+{'user_id': 'john'}
+
+
+
+
+

OpenID Connect Based Authentication

+

OpenID Connect (often abbreviated “OIDC”) is an identity layer on top of the OAuth 2.0 protocol. +An in-depth discussion of the whole architecture would lead us too far here, +but some central OpenID Connect concepts are quite useful to understand +in the context of working with openEO:

+
    +
  • There is decoupling between:

    +
      +
    • the OpenID Connect identity provider +which handles the authentication/authorization and stores user information +(e.g. an organization Google, Github, Microsoft, your academic/research institution, …)

    • +
    • the openEO back-end which manages earth observation collections +and executes your algorithms

    • +
    +

    Instead of managing the authentication procedure itself, +an openEO back-end forwards a user to the relevant OpenID Connect provider to authenticate +and request access to basic profile information (e.g. email address). +On return, when the user allowed this access, +the openEO back-end receives the profile information and uses this to identify the user.

    +

    Note that with this approach, the back-end does not have to +take care of all the security and privacy challenges +of properly handling user registration, passwords/authentication, etc. +Also, it allows the user to securely reuse an existing account +registered with an established organisation, instead of having +to register yet another account with some web service.

    +
  • +
  • Your openEO script or application acts as +a so called OpenID Connect client, with an associated client id. +In most cases, a default client (id) defined by the openEO back-end will be used automatically. +For some applications a custom client might be necessary, +but this is out of scope of this documentation.

  • +
  • OpenID Connect authentication can be done with different kind of “flows” (also called “grants”) +and picking the right flow depends on your specific use case. +The most common OIDC flows using the openEO Python Client Library are:

    + +
  • +
+

OpenID Connect is clearly more complex than Basic HTTP Auth. +In the sections below we will discuss the practical details of each flow.

+
+

General options

+
    +
  • A back-end might support multiple OpenID Connect providers. +The openEO Python Client Library will pick the first one by default, +but another another provider can specified explicity with the provider_id argument, e.g.:

    +
    connection.authenticate_oidc_device(
    +    provider_id="gl",
    +    ...
    +)
    +
    +
    +
  • +
+
+
+
+

OIDC Authentication: Device Code Flow

+

The device code flow (also called device authorization grant) +is an interactive flow that requires a web browser for the authentication +with the OpenID Connect provider. +The nice things is that the browser doesn’t have to run on +the same system or network as where you run your application, +you could even use a browser on your mobile phone.

+

Use authenticate_oidc_device() to initiate the flow:

+
connection.authenticate_oidc_device()
+
+
+

This will print a message like this:

+
Visit https://oidc.example.net/device
+and enter user code 'DTNY-KLNX' to authenticate.
+
+
+

Some OpenID Connect Providers use a slightly longer URL that already includes +the user code, and then you don’t need to enter the user code in one of the next steps:

+
Visit https://oidc.example.net/device?user_code=DTNY-KLNX to authenticate.
+
+
+

You should now visit this URL in your browser of choice. +Usually, it is intentionally a short URL to make it feasible to type it +instead of copy-pasting it (e.g. on another device).

+

Authenticate with the OpenID Connect provider and, if requested, enter the user code +shown in the message. +When the URL already contains the user code, the page won’t ask for this code.

+

Meanwhile, the openEO Python Client Library is actively polling the OpenID Connect +provider and when you successfully complete the authentication, +it will receive the necessary tokens for authenticated communication +with the back-end and print:

+
Authorized successfully.
+
+
+

In case of authentication failure, the openEO Python Client Library +will stop polling at some point and raise an exception.

+
+
+

OIDC Authentication: Refresh Token Flow

+

When OpenID Connect authentication completes successfully, +the openID Python library receives an access token +to be used when doing authenticated calls to the back-end. +The access token usually has a short lifetime to reduce +the security risk when it would be stolen or intercepted. +The openID Python library also receives a refresh token +that can be used, through the Refresh Token flow, +to easily request a new access token, +without having to re-authenticate, +which makes it useful for non-interactive uses cases.

+

However, as it needs an existing refresh token, +the Refresh Token Flow requires +first to authenticate with one of the other flows +(but in practice this should not be done very often +because refresh tokens usually have a relatively long lifetime). +When doing the initial authentication, +you have to explicitly enable storage of the refresh token, +through the store_refresh_token argument, e.g.:

+
connection.authenticate_oidc_device(
+    ...
+    store_refresh_token=True
+
+
+

The refresh token will be stored in file in private file +in your home directory and will be used automatically +when authenticating with the Refresh Token Flow, +using authenticate_oidc_refresh_token():

+
connection.authenticate_oidc_refresh_token()
+
+
+

You can also bootstrap the refresh token file +as described in OpenID Connect refresh tokens

+
+
+

OIDC Authentication: Client Credentials Flow

+

The OIDC Client Credentials flow does not involve interactive authentication (e.g. through a web browser), +which makes it a useful option for non-interactive use cases.

+
+

Important

+

This method requires a custom OIDC client id and client secret. +It is out of scope of this general documentation to explain +how to obtain these as it depends on the openEO back-end you are using +and the OIDC provider that is in play.

+

Also, your openEO back-end might not allow it, because technically +you are authenticating a client instead of a user.

+

Consult the support of the openEO back-end you want to use for more information.

+
+

In its most simple form, given your client id and secret, +you can authenticate with +authenticate_oidc_client_credentials() +as follows:

+
connection.authenticate_oidc_client_credentials(
+    client_id=client_id,
+    client_secret=client_secret,
+)
+
+
+

You might also have to pass a custom provider id (argument provider_id) +if your OIDC client is associated with an OIDC provider that is different from the default provider.

+
+

Caution

+

Make sure to keep the client secret a secret and avoid putting it directly in your source code +or, worse, committing it to a version control system. +Instead, fetch the secret from a protected source (e.g. a protected file, a database for sensitive data, …) +or from environment variables.

+
+
+

OIDC Client Credentials Using Environment Variables

+

Since version 0.18.0, the openEO Python Client Library has built-in support to get the client id, +secret (and provider id) from environment variables +OPENEO_AUTH_CLIENT_ID, OPENEO_AUTH_CLIENT_SECRET and OPENEO_AUTH_PROVIDER_ID respectively. +Just call authenticate_oidc_client_credentials() +without arguments.

+

Usage example assuming a Linux (Bash) shell context:

+
$ export OPENEO_AUTH_CLIENT_ID="my-client-id"
+$ export OPENEO_AUTH_CLIENT_SECRET="Cl13n7S3cr3t!?123"
+$ export OPENEO_AUTH_PROVIDER_ID="oidcprovider"
+$ python
+>>> import openeo
+>>> connection = openeo.connect("openeo.example.com")
+>>> connection.authenticate_oidc_client_credentials()
+<Connection to 'https://openeo.example.com/openeo/1.1/' with OidcBearerAuth>
+
+
+
+
+
+

OIDC Authentication: Dynamic Method Selection

+

The sections above discuss various authentication options, like +the device code flow, +refresh tokens and +client credentials flow, +but often you want to dynamically switch between these depending on the situation: +e.g. use a refresh token if you have an active one, and fallback on the device code flow otherwise. +Or you want to be able to run the same code in an interactive environment and automated in an unattended manner, +without having to switch authentication methods explicitly in code.

+

That is what Connection.authenticate_oidc() is for:

+
connection.authenticate_oidc() # is all you need
+
+
+

In a basic situation (without any particular environment variables set as discussed further), +this method will first try to authenticate with refresh tokens (if any) +and fall back on the device code flow otherwise. +Ideally, when valid refresh tokens are available, this works without interaction, +but occasionally, when the refresh tokens expire, one has to do the interactive device code flow.

+

Since version 0.18.0, the openEO Python Client Library also allows to trigger the +client credentials flow +from authenticate_oidc() +by setting environment variable OPENEO_AUTH_METHOD +and the other client credentials environment variables. +For example:

+
$ export OPENEO_AUTH_METHOD="client_credentials"
+$ export OPENEO_AUTH_CLIENT_ID="my-client-id"
+$ export OPENEO_AUTH_CLIENT_SECRET="Cl13n7S3cr3t!?123"
+$ export OPENEO_AUTH_PROVIDER_ID="oidcprovider"
+$ python
+>>> import openeo
+>>> connection = openeo.connect("openeo.example.com")
+>>> connection.authenticate_oidc()
+<Connection to 'https://openeo.example.com/openeo/1.1/' with OidcBearerAuth>
+
+
+
+
+

Auth config files and openeo-auth helper tool

+

The openEO Python Client Library provides some features and tools +that ease the usability and security challenges +that come with authentication (especially in case of OpenID Connect).

+

Note that the code examples above contain quite some passwords and other secrets +that should be kept safe from prying eyes. +It is bad practice to define these kind of secrets directly +in your scripts and source code because that makes it quite hard +to responsibly share or reuse your code. +Even worse is storing these secrets in your version control system, +where it might be near impossible to remove them again. +A better solution is to keep secrets in separate configuration or cache files, +outside of your normal source code tree +(to avoid committing them accidentally).

+

The openEO Python Client Library supports config files to store: +user names, passwords, client IDs, client secrets, etc, +so you don’t have to specify them always in your scripts and applications.

+

The openEO Python Client Library (when installed properly) +provides a command line tool openeo-auth to bootstrap and manage +these configs and secrets. +It is a command line tool that provides various “subcommands” +and has built-in help:

+
$ openeo-auth -h
+usage: openeo-auth [-h] [--verbose]
+                   {paths,config-dump,token-dump,add-basic,add-oidc,oidc-auth}
+                   ...
+
+Tool to manage openEO related authentication and configuration.
+
+optional arguments:
+  -h, --help            show this help message and exit
+
+Subcommands:
+  {paths,config-dump,token-dump,add-basic,add-oidc,oidc-auth}
+    paths               Show paths to config/token files.
+    config-dump         Dump config file.
+...
+
+
+

For example, to see the expected paths of the config files:

+
$ openeo-auth paths
+openEO auth config: /home/john/.config/openeo-python-client/auth-config.json (perms: 0o600, size: 1414B)
+openEO OpenID Connect refresh token store: /home/john/.local/share/openeo-python-client/refresh-tokens.json (perms: 0o600, size: 846B)
+
+
+

With the config-dump and token-dump subcommands you can dump +the current configuration and stored refresh tokens, e.g.:

+
$ openeo-auth config-dump
+### /home/john/.config/openeo-python-client/auth-config.json ###############
+{
+  "backends": {
+    "https://openeo.example.com": {
+      "basic": {
+        "username": "john",
+        "password": "<redacted>",
+        "date": "2020-07-24T13:40:50Z"
+...
+
+
+

The sensitive information (like passwords) are redacted by default.

+
+

Basic HTTP Auth config

+

With the add-basic subcommand you can add Basic HTTP Auth credentials +for a given back-end to the config. +It will interactively ask for username and password and +try if these credentials work:

+
$ openeo-auth add-basic https://openeo.example.com/
+Enter username and press enter: john
+Enter password and press enter:
+Trying to authenticate with 'https://openeo.example.com'
+Successfully authenticated 'john'
+Saved credentials to '/home/john/.config/openeo-python-client/auth-config.json'
+
+
+

Now you can authenticate in your application without having to +specify username and password explicitly:

+
connection.authenticate_basic()
+
+
+
+
+

OpenID Connect configs

+

Likewise, with the add-oidc subcommand you can add OpenID Connect +credentials to the config:

+
$ openeo-auth add-oidc https://openeo.example.com/
+Using provider ID 'example' (issuer 'https://oidc.example.net/')
+Enter client_id and press enter: client-d7393fba
+Enter client_secret and press enter:
+Saved client information to '/home/john/.config/openeo-python-client/auth-config.json'
+
+
+

Now you can user OpenID Connect based authentication in your application +without having to specify the client ID and client secret explicitly, +like one of these calls:

+
connection.authenticate_oidc_authorization_code()
+connection.authenticate_oidc_client_credentials()
+connection.authenticate_oidc_resource_owner_password_credentials(username=username, password=password)
+connection.authenticate_oidc_device()
+connection.authenticate_oidc_refresh_token()
+
+
+

Note that you still have to add additional options as required, like +provider_id, server_address, store_refresh_token, etc.

+
+

OpenID Connect refresh tokens

+

There is also a oidc-auth subcommand to execute an OpenID Connect +authentication flow and store the resulting refresh token. +This is intended to for bootstrapping the environment or system +on which you want to run openEO scripts or applications that use +the Refresh Token Flow for authentication. +For example:

+
$ openeo-auth oidc-auth https://openeo.example.com
+Using config '/home/john/.config/openeo-python-client/auth-config.json'.
+Starting OpenID Connect device flow.
+To authenticate: visit https://oidc.example.net/device and enter the user code 'Q7ZNsy'.
+Authorized successfully.
+The OpenID Connect device flow was successful.
+Stored refresh token in '/home/john/.local/share/openeo-python-client/refresh-tokens.json'
+
+
+
+
+
+
+

Default openEO back-end URL and auto-authentication

+
+

Added in version 0.10.0.

+
+

If you often use the same openEO back-end URL and authentication scheme, +it can be handy to put these in a configuration file as discussed at Configuration files.

+
+

Note

+

Note that these general configuration files are different +from the auth config files discussed earlier under Auth config files and openeo-auth helper tool. +The latter are for storing authentication related secrets +and are mostly managed automatically (e.g. by the oidc-auth helper tool). +The former are not for storing secrets and are usually edited manually.

+
+

For example, to define a default back-end and automatically use OpenID Connect authentication +add these configuration options to the desired configuration file:

+
[Connection]
+default_backend = openeo.cloud
+default_backend.auto_authenticate = oidc
+
+
+

Getting an authenticated connection is now as simple as:

+
>>> import openeo
+>>> connection = openeo.connect()
+Loaded openEO client config from openeo-client-config.ini
+Using default back-end URL 'openeo.cloud' (from config)
+Doing auto-authentication 'oidc' (from config)
+Authenticated using refresh token.
+
+
+
+
+

Authentication for long-running applications and non-interactive contexts

+

With OpenID Connect authentication, the access token +(which is used in the authentication headers) +is typically short-lived (e.g. couple of minutes or hours). +This practically means that an authenticated connection could expire and become unusable +before a long-running script or application finishes its whole workflow. +Luckily, OpenID Connect also includes usage of refresh tokens, +which have a much longer expiry and allow request a new access token +to re-authenticate the connection. +Since version 0.10.1, the openEO Python Client Library will automatically +attempt to re-authenticate a connection when access token expiry is detected +and valid refresh tokens are available.

+

Likewise, refresh tokens can also be used for authentication in cases +where a script or application is run automatically in the background on regular basis (daily, weekly, …). +If there is a non-expired refresh token available, the script can authenticate +without user interaction.

+
+

Guidelines and tips

+

Some guidelines to get long-term and non-interactive authentication working for your use case:

+
    +
  • If you run a workflow periodically, but the interval between runs +is larger than the expiry time of the refresh token +(e.g. a monthly job, while the refresh token expires after, say, 10 days), +you could consider setting up a custom OIDC client with better suited +refresh token timeout. +The practical details of this heavily depend on the OIDC Identity Provider +in play and are out of scope of this discussion.

  • +
  • Obtaining a refresh token requires manual/interactive authentication, +but once it is stored on the necessary machine(s) +in the refresh token store as discussed in Auth config files and openeo-auth helper tool, +no further manual interaction should be necessary +during the lifetime of the refresh token. +To do so, use one of the following methods:

    +
      +
    • Use the openeo-auth oidc-auth cli tool, for example to authenticate +for openeo back-end openeo.example.com:

      +
      $ openeo-auth oidc-auth openeo.example.com
      +...
      +Stored refresh token in '/home/john/.local/share/openeo-python-client/refresh-tokens.json'
      +
      +
      +
    • +
    • Use a Python snippet to authenticate and store the refresh token:

      +
      import openeo
      +connection = openeo.connect("openeo.example.com")
      +connection.authenticate_oidc_device(store_refresh_token=True)
      +
      +
      +
    • +
    +

    To verify that (and where) the refresh token is stored, use openeo-auth token-dump:

    +
    $ openeo-auth token-dump
    +### /home/john/.local/share/openeo-python-client/refresh-tokens.json #######
    +{
    +  "https://oidc.example.net": {
    +    "default-client": {
    +      "date": "2022-05-11T13:13:20Z",
    +      "refresh_token": "<redacted>"
    +    },
    +...
    +
    +
    +
  • +
+
+
+
+

Best Practices and Troubleshooting Tips

+
+

Warning

+

Handle (OIDC) access and refresh tokens like secret, personal passwords. +Never share your access or refresh tokens with other people, +publicly, or for user support reasons.

+
+
+

Clear the refresh token file

+

When you have authentication or permission issues and you suspect +that your (locally cached) refresh tokens are the culprit: +remove your refresh token file in one of the following ways:

+
    +
  • Locate the file with the openeo-auth command line tool:

    +
    $ openeo-auth paths
    +...
    +openEO OpenID Connect refresh token store: /home/john/.local/share/openeo-python-client/refresh-tokens.json (perms: 0o600, size: 846B)
    +
    +
    +

    and remove it. +Or, if you know what you are doing: remove the desired section from this JSON file.

    +
  • +
  • Remove it directly with the token-clear subcommand of the openeo-auth command line tool:

    +
    $ openeo-auth token-clear
    +
    +
    +
  • +
  • Remove it with this Python snippet:

    +
    from openeo.rest.auth.config import RefreshTokenStore
    +RefreshTokenStore().remove()
    +
    +
    +
  • +
+
+
+
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/basics.html b/basics.html new file mode 100644 index 000000000..ecb3999b1 --- /dev/null +++ b/basics.html @@ -0,0 +1,527 @@ + + + + + + + + Getting Started — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +
+

Getting Started

+
+

Connect to an openEO back-end

+

First, establish a connection to an openEO back-end, using its connection URL. +For example the VITO/Terrascope backend:

+
import openeo
+
+connection = openeo.connect("openeo.vito.be")
+
+
+

The resulting Connection object is your central gateway to

+
    +
  • list data collections, available processes, file formats and other capabilities of the back-end

  • +
  • start building your openEO algorithm from the desired data on the back-end

  • +
  • execute and monitor (batch) jobs on the back-end

  • +
  • etc.

  • +
+
+

See also

+

Use the openEO Hub to explore different back-end options +and their capabilities in a web-based way.

+
+
+
+

Collection discovery

+

The Earth observation data (the input of your openEO jobs) is organised in +so-called collections, +e.g. fundamental satellite collections like “Sentinel 1” or “Sentinel 2”, +or preprocessed collections like “NDVI”.

+

You can programmatically list the collections that are available on a back-end +and their metadata using methods on the connection object we just created +(like list_collection_ids() +or describe_collection()

+
>>> # Get all collection ids
+>>> connection.list_collection_ids()
+['SENTINEL1_GRD', 'SENTINEL2_L2A', ...
+
+>>> # Get metadata of a single collection
+>>> connection.describe_collection("SENTINEL2_L2A")
+{'id': 'SENTINEL2_L2A', 'title': 'Sentinel-2 top of canopy ...', 'stac_version': '0.9.0', ...
+
+
+

Congrats, you now just did your first real openEO queries to the openEO back-end +using the openEO Python client library.

+
+

Tip

+

The openEO Python client library comes with Jupyter (notebook) integration in a couple of places. +For example, put connection.describe_collection("SENTINEL2_L2A") (without print()) +as last statement in a notebook cell +and you’ll get a nice graphical rendering of the collection metadata.

+
+
+

See also

+

Find out more about data discovery, loading and filtering at Finding and loading data.

+
+
+
+

Authentication

+

In the code snippets above we did not need to log in as a user +since we just queried publicly available back-end information. +However, to run non-trivial processing queries one has to authenticate +so that permissions, resource usage, etc. can be managed properly.

+

To handle authentication, openEO leverages OpenID Connect (OIDC). +It offers some interesting features (e.g. a user can securely reuse an existing account), +but is a fairly complex topic, discussed in more depth at Authentication and Account Management.

+

The openEO Python client library tries to make authentication as streamlined as possible. +In most cases for example, the following snippet is enough to obtain an authenticated connection:

+
import openeo
+
+connection = openeo.connect("openeo.vito.be").authenticate_oidc()
+
+
+

This statement will automatically reuse a previously authenticated session, when available. +Otherwise, e.g. the first time you do this, some user interaction is required +and it will print a web link and a short user code, for example:

+
To authenticate: visit https://aai.egi.eu/auth/realms/egi/device and enter the user code 'SLUO-BMUD'.
+
+
+

Visit this web page in a browser, log in there with an existing account and enter the user code. +If everything goes well, the connection object in the script will be authenticated +and the back-end will be able to identify you in subsequent requests.

+
+
+

Example use case: EVI map and timeseries

+

A common task in earth observation is to apply a formula to a number of spectral bands +in order to compute an ‘index’, such as NDVI, NDWI, EVI, … +In this tutorial we’ll go through a couple of steps to extract +EVI (enhanced vegetation index) values and timeseries, +and discuss some openEO concepts along the way.

+
+
+

Loading an initial data cube

+

For calculating the EVI, we need the reflectance of the +red, blue and (near) infrared spectral components. +These spectral bands are part of the well-known Sentinel-2 data set +and is available on the current back-end under collection id SENTINEL2_L2A. +We load an initial small spatio-temporal slice (a data cube) as follows:

+
sentinel2_cube = connection.load_collection(
+    "SENTINEL2_L2A",
+    spatial_extent={"west": 5.14, "south": 51.17, "east": 5.17, "north": 51.19},
+    temporal_extent = ["2021-02-01", "2021-04-30"],
+    bands=["B02", "B04", "B08"]
+)
+
+
+

Note how we specify a the region of interest, a time range and a set of bands to load.

+
+

Important

+

By filtering as early as possible (directly in load_collection() in this case), +we make sure the back-end only loads the data we are interested in +for better performance and keeping the processing costs low.

+
+
+

See also

+

See the chapter Finding and loading data for more details on data discovery, +general data loading (Loading a data cube from a collection) and filtering +(e.g. Filter on temporal extent).

+
+

The load_collection() method on the connection +object created a DataCube object (variable sentinel2_cube). +This DataCube class of the openEO Python Client Library +provides loads of methods corresponding to various openEO processes, +e.g. for masking, filtering, aggregation, spectral index calculation, data fusion, etc. +In the next steps we will illustrate a couple of these.

+
+

Important

+

It is important to highlight that we did not load any real EO data yet. +Instead we just created an abstract client-side reference, +encapsulating the collection id, the spatial extent, the temporal extent, etc. +The actual data loading will only happen at the back-end +once we explicitly trigger the execution of the data processing pipeline we are building.

+
+
+
+

Band math

+

From this data cube, we can now select the individual bands +with the DataCube.band() method +and rescale the digital number values to physical reflectances:

+
blue = sentinel2_cube.band("B02") * 0.0001
+red = sentinel2_cube.band("B04") * 0.0001
+nir = sentinel2_cube.band("B08") * 0.0001
+
+
+

We now want to compute the enhanced vegetation index +and can do that directly with these band variables:

+
evi_cube = 2.5 * (nir - red) / (nir + 6.0 * red - 7.5 * blue + 1.0)
+
+
+
+

Important

+

As noted before: while this looks like an actual calculation, +there is no real data processing going on here. +The evi_cube object at this point is just an abstract representation +of our algorithm under construction. +The mathematical operators we used here are syntactic sugar +for expressing this part of the algorithm in a very compact way.

+

As an illustration of this, let’s have peek at the JSON representation +of our algorithm so far, the so-called openEO process graph:

+
>>> print(evi_cube.to_json(indent=None))
+{"process_graph": {"loadcollection1": {"process_id": "load_collection", ...
+... "id": "SENTINEL2_L2A", "spatial_extent": {"west": 5.15, "south": ...
+... "multiply1": { ... "y": 0.0001}}, ...
+... "multiply3": { ... {"x": 2.5, "y": {"from_node": "subtract1"}}} ...
+...
+
+
+

Note how the load_collection arguments, rescaling and EVI calculation aspects +can be deciphered from this. +Rest assured, as user you normally you don’t have to worry too much +about these process graph details, +the openEO Python Client library handles this behind the scenes for you.

+
+
+
+

Download (synchronously)

+

Let’s download this as a GeoTIFF file. +Because GeoTIFF does not support a temporal dimension, +we first eliminate it by taking the temporal maximum value for each pixel:

+
evi_composite = evi_cube.max_time()
+
+
+
+

Note

+

This max_time() is not an official openEO process +but one of the many convenience methods in the openEO Python Client Library +to simplify common processing patterns. +It implements a reduce operation along the temporal dimension +with a max reducer/aggregator.

+
+

Now we can download this to a local file:

+
evi_composite.download("evi-composite.tiff")
+
+
+

This download command triggers the actual processing on the back-end: +it sends the process graph to the back-end and waits for the result. +It is a synchronous operation (the download() call +blocks until the result is fully downloaded) and because we work on a small spatio-temporal extent, +this should only take a couple of seconds.

+

If we inspect the downloaded image, we see that the maximum EVI value is heavily impacted +by cloud related artefacts, which makes the result barely usable. +In the next steps we will address cloud masking.

+_images/evi-composite.png +
+
+

Batch Jobs (asynchronous execution)

+

Synchronous downloads are handy for quick experimentation on small data cubes, +but if you start processing larger data cubes, you can easily +hit computation time limits or other constraints. +For these larger tasks, it is recommended to work with batch jobs, +which allow you to work asynchronously: +after you start your job, you can disconnect (stop your script or even close your computer) +and then minutes/hours later you can reconnect to check the batch job status and download results. +The openEO Python Client Library also provides helpers to keep track of a running batch job +and show a progress report.

+
+

See also

+

See Batch Jobs for more details.

+
+
+
+

Applying a cloud mask

+

As mentioned above, we need to filter out cloud pixels to make the result more usable. +It is very common for earth observation data to have separate masking layers that for instance indicate +whether a pixel is covered by a (type of) cloud or not. +For Sentinel-2, one such layer is the “scene classification” layer generated by the Sen2Cor algorithm. +In this example, we will use this layer to mask out unwanted data.

+

First, we load a new SENTINEL2_L2A based data cube with this specific SCL band as single band:

+
s2_scl = connection.load_collection(
+    "SENTINEL2_L2A",
+    spatial_extent={"west": 5.14, "south": 51.17, "east": 5.17, "north": 51.19},
+    temporal_extent = ["2021-02-01", "2021-04-30"],
+    bands=["SCL"]
+)
+
+
+

Now we can use the compact “band math” feature again to build a +binary mask with a simple comparison operation:

+
# Select the "SCL" band from the data cube
+scl_band = s2_scl.band("SCL")
+# Build mask to mask out everything but class 4 (vegetation)
+mask = (scl_band != 4)
+
+
+

Before we can apply this mask to the EVI cube we have to resample it, +as the “SCL” layer has a “ground sample distance” of 20 meter, +while it is 10 meter for the “B02”, “B04” and “B08” bands. +We can easily do the resampling by referring directly to the EVI cube.

+
mask_resampled = mask.resample_cube_spatial(evi_cube)
+
+# Apply the mask to the `evi_cube`
+evi_cube_masked = evi_cube.mask(mask_resampled)
+
+
+

We can now download this as a GeoTIFF, again after taking the temporal maximum:

+
evi_cube_masked.max_time().download("evi-masked-composite.tiff")
+
+
+

Now, the EVI map is a lot more valuable, as the non-vegetation locations +and observations are filtered out:

+_images/evi-masked-composite.png +
+
+

Aggregated EVI timeseries

+

A common type of analysis is aggregating pixel values over one or more regions of interest +(also known as “zonal statistics) and tracking this aggregation over a period of time as a timeseries. +Let’s extract the EVI timeseries for these two regions:

+
features = {"type": "FeatureCollection", "features": [
+    {
+        "type": "Feature", "properties": {},
+        "geometry": {"type": "Polygon", "coordinates": [[
+            [5.1417, 51.1785], [5.1414, 51.1772], [5.1444, 51.1768], [5.1443, 51.179], [5.1417, 51.1785]
+        ]]}
+    },
+    {
+        "type": "Feature", "properties": {},
+        "geometry": {"type": "Polygon", "coordinates": [[
+            [5.156, 51.1892], [5.155, 51.1855], [5.163, 51.1855], [5.163, 51.1891], [5.156, 51.1892]
+        ]]}
+    }
+]}
+
+
+
+

Note

+

To have a self-containing example we define the geometries here as an inline GeoJSON-style dictionary. +In a real use case, your geometry will probably come from a local file or remote URL. +The openEO Python Client Library supports alternative ways of specifying the geometry +in methods like aggregate_spatial(), e.g. +as Shapely geometry objects.

+
+

Building on the experience from previous sections, we first build a masked EVI cube +(covering a longer time window than before):

+
# Load raw collection data
+sentinel2_cube = connection.load_collection(
+    "SENTINEL2_L2A",
+    spatial_extent={"west": 5.14, "south": 51.17, "east": 5.17, "north": 51.19},
+    temporal_extent = ["2020-01-01", "2021-12-31"],
+    bands=["B02", "B04", "B08", "SCL"],
+)
+
+# Extract spectral bands and calculate EVI with the "band math" feature
+blue = sentinel2_cube.band("B02") * 0.0001
+red = sentinel2_cube.band("B04") * 0.0001
+nir = sentinel2_cube.band("B08") * 0.0001
+evi = 2.5 * (nir - red) / (nir + 6.0 * red - 7.5 * blue + 1.0)
+
+# Use the scene classification layer to mask out non-vegetation pixels
+scl = sentinel2_cube.band("SCL")
+evi_masked = evi.mask(scl != 4)
+
+
+

Now we use the aggregate_spatial() method +to do spatial aggregation over the geometries we defined earlier. +Note how we can specify the aggregation function "mean" as a simple string for the reducer argument.

+
evi_aggregation = evi_masked.aggregate_spatial(
+    geometries=features,
+    reducer="mean",
+)
+
+
+

If we download this, we get the timeseries encoded as a JSON structure, other useful formats are CSV and netCDF.

+
evi_aggregation.download("evi-aggregation.json")
+
+
+
+

Warning

+

Technically, the output of the openEO process aggregate_spatial +is a so-called “vector cube”. +At the time of this writing, the specification of this openEO concept +is not fully fleshed out yet in the openEO API. +openEO back-ends and clients to provide best-effort support for it, +but bear in mind that some details are subject to change.

+
+

The openEO Python Client Library provides helper functions +to convert the downloaded JSON data to a pandas dataframe, +which we massage a bit more:

+
import json
+import pandas as pd
+from openeo.rest.conversions import timeseries_json_to_pandas
+
+import json
+with open("evi-aggregation.json") as f:
+    data = json.load(f)
+
+df = timeseries_json_to_pandas(data)
+df.index = pd.to_datetime(df.index)
+df = df.dropna()
+df.columns = ("Field A", "Field B")
+
+
+

This gives us finally our EVI timeseries dataframe:

+
>>> df
+                           Field A   Field B
+date
+2020-01-06 00:00:00+00:00  0.522499  0.300250
+2020-01-16 00:00:00+00:00  0.529591  0.288079
+2020-01-18 00:00:00+00:00  0.633011  0.327598
+...                             ...       ...
+
+
+_images/evi-timeseries.png +
+
+

Computing multiple statistics

+
+

Warning

+

This is an experimental feature of the GeoPySpark openEO back-end, +it may not be supported by other back-ends, +and is subject to change. +See Open-EO/openeo-geopyspark-driver#726 for further discussion,

+
+

The same method also allows the computation of multiple statistics at once. This does rely +on ‘callbacks’ to construct a result with multiple statistics. +The use of such more complex processes is further explained in Processes with child “callbacks”.

+
from openeo.processes import array_create, mean, sd, median, count
+
+evi_aggregation = evi_masked.aggregate_spatial(
+    geometries=features,
+    reducer=lambda x: array_create([mean(x), sd(x), median(x), count(x)]),
+)
+
+
+
+
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/batch_jobs.html b/batch_jobs.html new file mode 100644 index 000000000..15add270f --- /dev/null +++ b/batch_jobs.html @@ -0,0 +1,446 @@ + + + + + + + + Batch Jobs — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +
+

Batch Jobs

+

Most of the simple, basic openEO usage examples show synchronous downloading of results: +you submit a process graph with a (HTTP POST) request and receive the result +as direct response of that same request. +This only works properly if the processing doesn’t take too long (order of seconds, or a couple of minutes at most).

+

For the heavier work (larger regions of interest, larger time series, more intensive processing, …) +you have to use batch jobs, which are supported in the openEO API through separate HTTP requests, corresponding to these steps:

+
    +
  • you create a job (providing a process graph and some other metadata like title, description, …)

  • +
  • you start the job

  • +
  • you wait for the job to finish, periodically polling its status

  • +
  • when the job finished successfully: get the listing of result assets

  • +
  • you download the result assets (or use them in an other way)

  • +
+
+

Tip

+

This documentation mainly discusses how to programmatically +create and interact with batch job using the openEO Python client library. +The openEO API however does not enforce usage of the same tool +for each step in the batch job life cycle.

+

For example: if you prefer a graphical, web-based interactive environment +to manage and monitor your batch jobs, +feel free to switch to an openEO web editor +like editor.openeo.org +or editor.openeo.cloud +at any time. +After logging in with the same account you use in your Python scripts, +you should see your batch jobs listed under the “Data Processing” tab:

+_images/batchjobs-webeditor-listing.png +

With the “action” buttons on the right, you can for example +inspect batch job details, start/stop/delete jobs, +download their results, get batch job logs, etc.

+
+
+

Create a batch job

+

In the openEO Python Client Library, if you have a (raster) data cube, you can easily +create a batch job with the DataCube.create_job() method. +It’s important to specify in what format the result should be stored, +which can be done with an explicit DataCube.save_result() call before creating the job:

+
cube = connection.load_collection(...)
+...
+# Store raster data as GeoTIFF files
+cube = cube.save_result(format="GTiff")
+job = cube.create_job()
+
+
+

or directly in job.create_job():

+
cube = connection.load_collection(...)
+...
+job = cube.create_job(out_format="GTiff)
+
+
+

While not necessary, it is also recommended to give your batch job a descriptive title +so it’s easier to identify in your job listing, e.g.:

+
job = cube.create_job(title="NDVI timeseries 2022")
+
+
+
+
+

Batch job object

+

The job object returned by create_job() +is a BatchJob object. +It is basically a client-side reference to a batch job that exists on the back-end +and allows to interact with that batch job +(see the BatchJob API docs for +available methods).

+
+

Note

+

The BatchJob class originally had +the more cryptic name RESTJob, +which is still available as legacy alias, +but BatchJob is (available and) recommended since version 0.11.0.

+
+

A batch job on a back-end is fully identified by its +job_id:

+
>>> job.job_id
+'d5b8b8f2-74ce-4c2e-b06d-bff6f9b14b8d'
+
+
+
+

Reconnecting to a batch job

+

Depending on your situation or use case: +make sure to properly take note of the batch job id. +It allows you to “reconnect” to your job on the back-end, +even if it was created at another time, +by another script/notebook or even with another openEO client.

+

Given a back-end connection and the batch job id, +use Connection.job() +to create a BatchJob object for an existing batch job:

+
job_id = "5d806224-fe79-4a54-be04-90757893795b"
+job = connection.job(job_id)
+
+
+
+
+

Jupyter integration

+

BatchJob objects have basic Jupyter notebook integration. +Put your BatchJob object as last statement +in a notebook cell and you get an overview of your batch jobs, +including job id, status, title and even process graph visualization:

+_images/batchjobs-jupyter-created.png +
+
+
+

List your batch jobs

+

You can list your batch jobs on the back-end with +Connection.list_jobs(), which returns a list of job metadata:

+
>>> connection.list_jobs()
+[{'title': 'NDVI timeseries 2022', 'status': 'created', 'id': 'd5b8b8f2-74ce-4c2e-b06d-bff6f9b14b8d', 'created': '2022-06-08T08:58:11Z'},
+ {'title': 'NDVI timeseries 2021', 'status': 'finished', 'id': '4e720e70-88bd-40bc-92db-a366985ebd67', 'created': '2022-06-04T14:46:06Z'},
+ ...
+
+
+

The listing returned by Connection.list_jobs() +has Jupyter notebook integration:

+_images/batchjobs-jupyter-listing.png +
+
+

Run a batch job

+

Starting a batch job is pretty straightforward with the +start() method:

+
job.start()
+
+
+

If this didn’t raise any errors or exceptions your job +should now have started (status “running”) +or be queued for processing (status “queued”).

+
+

Wait for a batch job to finish

+

A batch job typically takes some time to finish, +and you can check its status with the status() method:

+
>>> job.status()
+"running"
+
+
+

The possible batch job status values, defined by the openEO API, are +“created”, “queued”, “running”, “canceled”, “finished” and “error”.

+

Usually, you can only reliably get results from your job, +as discussed in Download batch job results, +when it reaches status “finished”.

+
+
+

Create, start and wait in one go

+

You could, depending on your situation, manually check your job’s status periodically +or set up a polling loop system to keep an eye on your job. +The openEO Python client library also provides helpers to do that for you.

+

Working from an existing BatchJob instance

+
+

If you have a batch job that is already created as shown above, you can use +the job.start_and_wait() method +to start it and periodically poll its status until it reaches status “finished” (or fails with status “error”). +Along the way it will print some progress messages.

+
>>> job.start_and_wait()
+0:00:00 Job 'b0e8adcf-087f-41de-afe6-b3c0ea88ff38': send 'start'
+0:00:36 Job 'b0e8adcf-087f-41de-afe6-b3c0ea88ff38': queued (progress N/A)
+0:01:35 Job 'b0e8adcf-087f-41de-afe6-b3c0ea88ff38': queued (progress N/A)
+0:02:19 Job 'b0e8adcf-087f-41de-afe6-b3c0ea88ff38': running (progress N/A)
+0:02:50 Job 'b0e8adcf-087f-41de-afe6-b3c0ea88ff38': running (progress N/A)
+0:03:28 Job 'b0e8adcf-087f-41de-afe6-b3c0ea88ff38': finished (progress N/A)
+
+
+
+

Working from a DataCube instance

+
+

If you didn’t create the batch job yet from a given DataCube +you can do the job creation, starting and waiting in one go +with cube.execute_batch():

+
>>> job = cube.execute_batch()
+0:00:00 Job 'f9f4e3d3-bc13-441b-b76a-b7bfd3b59669': send 'start'
+0:00:23 Job 'f9f4e3d3-bc13-441b-b76a-b7bfd3b59669': queued (progress N/A)
+...
+
+
+

Note that cube.execute_batch() +returns a BatchJob instance pointing to +the newly created batch job.

+
+
+

Tip

+

You can fine-tune the details of the polling loop (the poll frequency, +how the progress is printed, …). +See job.start_and_wait() +or cube.execute_batch() +for more information.

+
+
+
+
+

Batch job logs

+

Batch jobs in openEO have logs to help with monitoring and debugging batch jobs. +The back-end typically uses this to dump information during data processing +that may be relevant for the user (e.g. warnings, resource stats, …). +Moreover, openEO processes like inspect allow users to log their own information.

+

Batch job logs can be fetched with job.logs()

+
>>> job.logs()
+[{'id': 'log001', 'level': 'info', 'message': 'Job started with 4 workers'},
+ {'id': 'log002', 'level': 'debug', 'message': 'Loading 5x3x6 tiles'},
+ {'id': 'log003', 'level': 'error', 'message': "Failed to load data cube: corrupt data for tile 'J9A7K2'."},
+...
+
+
+

In a Jupyter notebook environment, this also comes with Jupyter integration:

+_images/batchjobs-jupyter-logs.png +
+

Automatic batch job log printing

+

When using +job.start_and_wait() +or cube.execute_batch() +to run a batch job and it fails, +the openEO Python client library will automatically +print the batch job logs and instructions to help with further investigation:

+
>>> job.start_and_wait()
+0:00:00 Job '68caccff-54ee-470f-abaa-559ed2d4e53c': send 'start'
+0:00:01 Job '68caccff-54ee-470f-abaa-559ed2d4e53c': running (progress N/A)
+0:00:07 Job '68caccff-54ee-470f-abaa-559ed2d4e53c': error (progress N/A)
+
+Your batch job '68caccff-54ee-470f-abaa-559ed2d4e53c' failed.
+Logs can be inspected in an openEO (web) editor
+or with `connection.job('68caccff-54ee-470f-abaa-559ed2d4e53c').logs()`.
+
+Printing logs:
+[{'id': 'log001', 'level': 'info', 'message': 'Job started with 4 workers'},
+{'id': 'log002', 'level': 'debug', 'message': 'Loading 5x3x6 tiles'},
+{'id': 'log003', 'level': 'error', 'message': "Failed to load data cube: corrupt data for tile 'J9A7K2'."}]
+
+
+
+
+
+

Download batch job results

+

Once a batch job is finished you can get a handle to the results +(which can be a single file or multiple files) and metadata +with get_results():

+
>>> results = job.get_results()
+>>> results
+<JobResults for job '57da31da-7fd4-463a-9d7d-c9c51646b6a4'>
+
+
+

The result metadata describes the spatio-temporal properties of the result +and is in fact a valid STAC item:

+
>>> results.get_metadata()
+{
+    'bbox': [3.5, 51.0, 3.6, 51.1],
+    'geometry': {'coordinates': [[[3.5, 51.0], [3.5, 51.1], [3.6, 51.1], [3.6, 51.0], [3.5, 51.0]]], 'type': 'Polygon'},
+    'assets': {
+        'res001.tiff': {
+            'href': 'https://openeo.example/download/432f3b3ef3a.tiff',
+            'type': 'image/tiff; application=geotiff',
+            ...
+        'res002.tiff': {
+            ...
+
+
+
+

Download all assets

+

In the general case, when you have one or more result files (also called “assets”), +the easiest option to download them is +using download_files() (plural) +where you just specify a download folder +(otherwise the current working directory will be used by default):

+
results.download_files("data/out")
+
+
+

The resulting files will be named as they are advertised in the results metadata +(e.g. res001.tiff and res002.tiff in case of the metadata example above).

+
+
+

Download single asset

+

If you know that there is just a single result file, you can also download it directly with +download_file() (singular) with the desired file name:

+
results.download_file("data/out/result.tiff")
+
+
+

This will fail however if there are multiple assets in the job result +(like in the metadata example above). +In that case you can still download a single by specifying which one you +want to download with the name argument:

+
results.download_file("data/out/result.tiff", name="res002.tiff")
+
+
+
+
+

Fine-grained asset downloads

+

If you need a bit more control over which asset to download and how, +you can iterate over the result assets explicitly +and download these ResultAsset instances +with download(), like this:

+
for asset in results.get_assets():
+    if asset.metadata["type"].startswith("image/tiff"):
+        asset.download("data/out/result-v2-" + asset.name)
+
+
+
+
+
+

Directly load batch job results

+

If you want to skip downloading an asset to disk, you can also load it directly. +For example, load a JSON asset with load_json():

+
>>> asset.metadata
+{"type": "application/json", "href": "https://openeo.example/download/432f3b3ef3a.json"}
+>>> data = asset.load_json()
+>>> data
+{"2021-02-24T10:59:23Z": [[3, 2, 5], [3, 4, 5]], ....}
+
+
+
+
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/best_practices.html b/best_practices.html new file mode 100644 index 000000000..82ad7b4ac --- /dev/null +++ b/best_practices.html @@ -0,0 +1,213 @@ + + + + + + + + Best practices, coding style and general tips — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +
+

Best practices, coding style and general tips

+

This is a collection of guidelines regarding best practices, +coding style and usage patterns for the openEO Python Client Library.

+

It is in the first place an internal recommendation for openEO developers +to give documentation, code examples, demo’s and tutorials +a consistent look and feel, +following common software engineering best practices. +Secondly, the wider audience of openEO users is also invited to pick up +a couple of tips and principles to improve their own code and scripts.

+
+

Background and inspiration

+

While some people consider coding style a personal choice or even irrelevant, +there are various reasons to settle on certain conventions. +Just the fact alone of following conventions +lowers the bar to get faster to the important details in someone else’s code. +Apart from taste, there are also technical reasons to pick certain rules +to streamline the programming workflow, +not only for humans, +but also supporting tools (e.g. minimize risk on merge conflicts).

+

While the Python language already has a strong focus on readability by design, +the Python community is strongly gravitating to even more strict conventions:

+
    +
  • pep8: the mother of all Python code style guides

  • +
  • black: an opinionated code formatting tool +that gets more and more traction in popular, high profile projects.

  • +
+

This openEO oriented style guide will highlight +and build on these recommendations.

+
+
+

General code style recommendations

+
    +
  • Indentation with 4 spaces.

  • +
  • Avoid star imports (from module import *). +While this seems like a quick way to import a bunch of functions/classes, +it makes it very hard for the reader to figure out where things come from. +It can also lead to strange bugs and behavior because it silently overwrites +references you previously imported.

  • +
+
+
+

Line (length) management

+

While desktop monitors offer plenty of (horizontal) space nowadays, +it is still a common recommendation to avoid long source code lines. +Not only are long lines hard to read and understand, +one should also consider that source code might still be viewed +on a small screen or tight viewport, +where scrolling horizontally is annoying or even impossible. +Unnecessarily long lines are also notorious +for not playing well with version control tools and workflows.

+

Here are some guidelines on how to split long statements over multiple lines.

+

Split long function/method calls directly after the opening parenthesis +and list arguments with a standard 4 space indentation +(not after the first argument with some ad-hoc indentation). +Put the closing parenthesis on its own line.

+
# Avoid this:
+s2_fapar = connection.load_collection("TERRASCOPE_S2_FAPAR_V2",
+                                      spatial_extent={'west': 16.138916, 'east': 16.524124, 'south': 48.1386, 'north': 48.320647},
+                                      temporal_extent=["2020-05-01", "2020-05-20"])
+
+# This is better:
+s2_fapar = connection.load_collection(
+    "TERRASCOPE_S2_FAPAR_V2",
+    spatial_extent={"west": 16.138916, "east": 16.524124, "south": 48.1386, "north": 48.320647},
+    temporal_extent=["2020-05-01", "2020-05-20"],
+)
+
+
+
+
+

Jupyter(lab) tips and tricks

+
    +
  • Add a cell with openeo.client_version() (e.g. just after importing all your libraries) +to keep track of which version of the openeo Python client library you used in your notebook.

  • +
+
+
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/changelog.html b/changelog.html new file mode 100644 index 000000000..c8df3f228 --- /dev/null +++ b/changelog.html @@ -0,0 +1,1381 @@ + + + + + + + + Changelog — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +
+

Changelog

+

All notable changes to this project will be documented in this file.

+

The format is based on Keep a Changelog, +and this project adheres to Semantic Versioning.

+
+

[Unreleased]

+
+

Added

+
+
+

Changed

+
+
+

Removed

+
+
+

Fixed

+
+
+
+

[0.36.0] - 2024-12-10

+
+

Added

+
    +
  • Automatically use load_url when providing a URL as geometries to DataCube.aggregate_spatial(), DataCube.mask_polygon(), etc. (#104, #457)

  • +
  • Allow specifying limit when listing batch jobs with Connection.list_jobs() (#677)

  • +
  • Add additional and job_options arguments to Connection.download(), Datacube.download() and related (#681)

  • +
+
+
+

Changed

+
    +
  • MultiBackendJobManager: costs has been added as a column in tracking databases ([#588])

  • +
  • When passing a path/string as geometry to DataCube.aggregate_spatial(), DataCube.mask_polygon(), etc.: +this is not translated automatically anymore to deprecated, non-standard read_vector usage. +Instead, if it is a local GeoJSON file, the GeoJSON data will be loaded directly client-side. +(#104, #457)

  • +
  • Move read() method from general JobDatabaseInterface to more specific FullDataFrameJobDatabase (#680)

  • +
  • Align additional and job_options arguments in Connection.create_job(), DataCube.create_job() and related. +Also, follow official spec more closely. (#683, Open-EO/openeo-api#276)

  • +
+
+
+

Fixed

+
    +
  • load_stac: use fallback temporal dimension when no “cube:dimensions” in STAC Collection (#666)

  • +
  • Fix usage of Parameter.spatial_extent() with load_collection and filter_bbox (#676)

  • +
+
+
+
+

[0.35.0] - 2024-11-19

+
+

Added

+
    +
  • Added MultiResult helper class to build process graphs with multiple result nodes (#391)

  • +
+
+
+

Fixed

+
    +
  • MultiBackendJobManager: Fix issue with duplicate job starting across multiple backends (#654)

  • +
  • MultiBackendJobManager: Fix encoding issue of job metadata in on_job_done (#657)

  • +
  • MultiBackendJobManager: Avoid SettingWithCopyWarning (#641)

  • +
  • Avoid creating empty file if asset download request failed.

  • +
  • MultiBackendJobManager: avoid dtype loading mistakes in CsvJobDatabase on empty columns (#656)

  • +
  • MultiBackendJobManager: restore logging of job status histogram during run_jobs (#655)

  • +
+
+
+
+

[0.34.0] - 2024-10-31

+
+

Removed

+
    +
  • Drop support for Python 3.7 (#578)

  • +
+
+
+

Fixed

+
    +
  • Fixed broken support for title and description job properties in execute_batch() (#652)

  • +
+
+
+
+

[0.33.0] - 2024-10-18

+
+

Added

+
    +
  • Added DataCube.load_stac() to also support creating a load_stac based cube without a connection (#638)

  • +
  • MultiBackendJobManager: Added initialize_from_df(df) (to CsvJobDatabase and ParquetJobDatabase) to initialize (and persist) the job database from a given DataFrame. +Also added create_job_db() factory to easily create a job database from a given dataframe and its type guessed from filename extension. +(#635)

  • +
  • MultiBackendJobManager.run_jobs() now returns a dictionary with counters/stats about various events during the full run of the job manager (#645)

  • +
  • Added (experimental) ProcessBasedJobCreator to be used as start_job callable with MultiBackendJobManager to create multiple jobs from a single parameterized process (e.g. a UDP or remote process definition) (#604)

  • +
+
+
+

Fixed

+
    +
  • When using DataCube.load_collection() without a connection, it is not necessary anymore to also explicitly set fetch_metadata=False (#638)

  • +
+
+
+
+

[0.32.0] - 2024-09-27

+
+

Added

+
    +
  • load_stac/metadata_from_stac: add support for extracting actual temporal dimension metadata (#567)

  • +
  • MultiBackendJobManager: add cancel_running_job_after option to automatically cancel jobs that are running for too long (#590)

  • +
  • Added openeo.api.process.Parameter helper to easily create a “spatial_extent” UDP parameter

  • +
  • Wrap OIDC token request failure in more descriptive OidcException (related to #624)

  • +
  • Added auto_add_save_result option (on by default) to disable automatic addition of save_result node on download/create_job/execute_batch (#513)

  • +
  • Add support for apply_vectorcube UDF signature in run_udf_code (Open-EO/openeo-geopyspark-driver#881)

  • +
  • MultiBackendJobManager: add API to the update loop in a separate thread, allowing controlled interruption.

  • +
+
+
+

Changed

+
    +
  • MultiBackendJobManager: changed job metadata storage API, to enable working with large databases

  • +
  • DataCube.apply_polygon(): rename polygons argument to geometries, but keep support for legacy polygons for now (#592, #511)

  • +
  • Disallow ambiguous single string argument in DataCube.filter_temporal() (#628)

  • +
  • Automatic adding of save_result from download() or create_job(): +inspect whole process graph for pre-existing save_result nodes +(related to #623, #401, #583)

  • +
  • Disallow ambiguity of combining explicit save_result nodes +and implicit save_result addition from download()/create_job() calls with format +(related to #623, #401, #583)

  • +
+
+
+

Fixed

+
    +
  • apply_dimension with a target_dimension argument was not correctly adjusting datacube metadata on the client side, causing a mismatch.

  • +
  • Preserve non-spatial dimension metadata in aggregate_spatial (#612)

  • +
+
+
+
+

[0.31.0] - 2024-07-26

+
+

Added

+
    +
  • Add experimental openeo.testing.results subpackage with reusable test utilities for comparing batch job results with reference data

  • +
  • MultiBackendJobManager: add initial support for storing job metadata in Parquet file (instead of CSV) (#571)

  • +
  • Add Connection.authenticate_oidc_access_token() to set up authorization headers with an access token that is obtained “out-of-band” (#598)

  • +
  • Add JobDatabaseInterface to allow custom job metadata storage with MultiBackendJobManager (#571)

  • +
+
+
+
+

[0.30.0] - 2024-06-18

+
+

Added

+
    +
  • Add openeo.udf.run_code.extract_udf_dependencies() to extract UDF dependency declarations from UDF code +(related to Open-EO/openeo-geopyspark-driver#237)

  • +
  • Document PEP 723 based Python UDF dependency declarations (Open-EO/openeo-geopyspark-driver#237)

  • +
  • Added more openeo.api.process.Parameter helpers to easily create “bounding_box”, “date”, “datetime”, “geojson” and “temporal_interval” parameters for UDP construction.

  • +
  • Added convenience method Connection.load_stac_from_job(job) to easily load the results of a batch job with the load_stac process (#566)

  • +
  • load_stac/metadata_from_stac: add support for extracting band info from “item_assets” in collection metadata (#573)

  • +
  • Added initial openeo.testing submodule for reusable test utilities

  • +
+
+
+

Fixed

+
    +
  • Initial fix for broken DataCube.reduce_temporal() after load_stac (#568)

  • +
+
+
+
+

[0.29.0] - 2024-05-03

+
+

Added

+
    +
  • Start depending on pystac, initially for better load_stac support (#133, #527)

  • +
+
+
+

Changed

+
    +
  • OIDC device code flow: hide progress bar on completed (or timed out) authentication

  • +
+
+
+
+

[0.28.0] - 2024-03-18

+
+

Added

+
    +
  • Introduced superclass CubeMetadata for CollectionMetadata for essential metadata handling (just dimensions for now) without collection-specific STAC metadata parsing. (#464)

  • +
  • Added VectorCube.vector_to_raster() (#550)

  • +
+
+
+

Changed

+
    +
  • Changed default chunk_size of various download functions from None to 10MB. This improves the handling of large downloads and reduces memory usage. (#528)

  • +
  • Connection.execute() and DataCube.execute() now have a auto_decode argument. If set to True (default) the response will be decoded as a JSON and throw an exception if this fails, if set to False the raw requests.Response object will be returned. (#499)

  • +
+
+
+

Fixed

+
    +
  • Preserve geo-referenced x and y coordinates in execute_local_udf (#549)

  • +
+
+
+
+

[0.27.0] - 2024-01-12

+
+

Added

+
    +
  • Add DataCube.filter_labels()

  • +
+
+
+

Changed

+
    +
  • Update autogenerated functions/methods in openeo.processes to definitions from openeo-processes project version 2.0.0-rc1. +This removes create_raster_cube, fit_class_random_forest, fit_regr_random_forest and save_ml_model. +Although removed from openeo-processes 2.0.0-rc1, support for load_result, predict_random_forest and load_ml_model +is preserved but deprecated. (#424)

  • +
  • Show more informative error message on 403 Forbidden errors from CDSE firewall (#512)

  • +
  • Handle API error responses more strict and avoid hiding possibly important information in JSON-formatted but non-compliant error responses.

  • +
+
+
+

Fixed

+
    +
  • Fix band name support in DataCube.band() when no metadata is available (#515)

  • +
  • Support optional child callbacks in generated openeo.processes, e.g. merge_cubes (#522)

  • +
  • Fix broken pre-flight validation in Connection.save_user_defined_process (#526)

  • +
+
+
+
+

[0.26.0] - 2023-11-27 - “SRR6” release

+
+

Added

+
    +
  • Support new UDF signature: def apply_datacube(cube: DataArray, context: dict) -> DataArray +(#310)

  • +
  • Add collection_property() helper to easily build collection metadata property filters for Connection.load_collection() +(#331)

  • +
  • Add DataCube.apply_polygon() (standardized version of experimental chunk_polygon) (#424)

  • +
  • Various improvements to band mapping with the Awesome Spectral Indices feature. +Allow explicitly specifying the satellite platform for band name mapping (e.g. “Sentinel2” or “LANDSAT8”) if cube metadata lacks info. +Follow the official band mapping from Awesome Spectral Indices better. +Allow manually specifying the desired band mapping. +(#485, #501)

  • +
  • Also attempt to automatically refresh OIDC access token on a 401 TokenInvalid response (in addition to 403 TokenInvalid) (#508)

  • +
  • Add Parameter.object() factory for object type parameters

  • +
+
+
+

Removed

+
    +
  • Remove custom spectral indices “NDGI”, “NDMI” and “S2WI” from “extra-indices-dict.json” +that were shadowing the official definitions from Awesome Spectral Indices (#501)

  • +
+
+
+

Fixed

+
    +
  • Initial support for “spectral indices” that use constants defined by Awesome Spectral Indices (#501)

  • +
+
+
+
+

[0.25.0] - 2023-11-02

+
+

Changed

+
    +
  • Introduce OpenEoApiPlainError for API error responses that are not well-formed +for better distinction with properly formed API error responses (OpenEoApiError). +(#491).

  • +
+
+
+

Fixed

+
    +
  • Fix missing validate support in LocalConnection.execute (#493)

  • +
+
+
+
+

[0.24.0] - 2023-10-27

+
+

Added

+
    +
  • Add DataCube.reduce_spatial()

  • +
  • Added option (enabled by default) to automatically validate a process graph before execution. +Validation issues just trigger warnings for now. (#404)

  • +
  • Added “Sentinel1” band mapping support to “Awesome Spectral Indices” wrapper (#484)

  • +
  • Run tests in GitHub Actions against Python 3.12 as well

  • +
+
+
+

Changed

+
    +
  • Enforce XarrayDataCube dimension order in execute_local_udf() to (t, bands, y, x) +to improve UDF interoperability with existing back-end implementations.

  • +
+
+
+
+

[0.23.0] - 2023-10-02

+
+

Added

+
    +
  • Support year/month shorthand date notations in temporal extent arguments of Connection.load_collection, DataCube.filter_temporal and related (#421)

  • +
  • Support parameterized bands in load_collection (#471)

  • +
  • Allow specifying item schema in Parameter.array()

  • +
  • Support “subtype” and “format” schema options in Parameter.string()

  • +
+
+
+

Changed

+
    +
  • Before doing user-defined process (UDP) listing/creation: verify that back-end supports that (through openEO capabilities document) to improve error message.

  • +
  • Skip metadata-based normalization/validation and stop showing unhelpful warnings/errors +like “No cube:dimensions metadata” or “Invalid dimension” +when no metadata is available client-side anyway (e.g. when using datacube_from_process, parameterized cube building, …). +(#442)

  • +
+
+
+

Removed

+
    +
  • Bumped minimal supported Python version to 3.7 (#460)

  • +
+
+
+

Fixed

+
    +
  • Support handling of “callback” parameters in openeo.processes callables (#470)

  • +
+
+
+
+

[0.22.0] - 2023-08-09

+
+

Added

+
    +
  • Processes that take a CRS as argument now try harder to normalize your input to +a CRS representation that aligns with the openEO API (using pyproj library when available) +(#259)

  • +
  • Initial load_geojson support with Connection.load_geojson() (#424)

  • +
  • Initial load_url (for vector cubes) support with Connection.load_url() (#424)

  • +
  • Add VectorCube.apply_dimension() (Open-EO/openeo-python-driver#197)

  • +
  • Support lambda based property filtering in Connection.load_stac() (#425)

  • +
  • VectorCube: initial support for filter_bands, filter_bbox, filter_labels and filter_vector (#459)

  • +
+
+
+

Changed

+
    +
  • Connection based requests: always use finite timeouts by default (20 minutes in general, 30 minutes for synchronous execute requests) +(#454)

  • +
+
+
+

Fixed

+
    +
  • Fix: MultibackendJobManager should stop when finished, also when job finishes with error (#452)

  • +
+
+
+
+

[0.21.1] - 2023-07-19

+
+

Fixed

+
    +
  • Fix spatial_extent/temporal_extent handling in “localprocessing” load_stac (#451)

  • +
+
+
+
+

[0.21.0] - 2023-07-19

+
+

Added

+
    +
  • Add support in VectoCube.download() and VectorCube.execute_batch() to guess output format from extension of a given filename +(#401, #449)

  • +
  • Added load_stac for Client Side Processing, based on the openeo-processes-dask implementation

  • +
+
+
+

Changed

+
    +
  • Updated docs for Client Side Processing with load_stac examples, available at https://open-eo.github.io/openeo-python-client/cookbook/localprocessing.html

  • +
+
+
+

Fixed

+
    +
  • Avoid double save_result nodes when combining VectorCube.save_result() and .download(). +(#401, #448)

  • +
+
+
+
+

[0.20.0] - 2023-06-30

+
+

Added

+
    +
  • Added automatically renewal of access tokens with OIDC client credentials grant (Connection.authenticate_oidc_client_credentials) +(#436)

  • +
+
+
+

Changed

+
    +
  • Simplified BatchJob methods start(), stop(), describe(), … +Legacy aliases start_job(), describe_job(), … are still available and don’t trigger a deprecation warning for now. +(#280)

  • +
  • Update openeo.extra.spectral_indices to Awesome Spectral Indices v0.4.0

  • +
+
+
+
+

[0.19.0] - 2023-06-16

+
+

Added

+
    +
  • Generalized support for setting (default) OIDC provider id through env var OPENEO_AUTH_PROVIDER_ID +#419

  • +
  • Added OidcDeviceCodePollTimeout: specific exception for OIDC device code flow poll timeouts

  • +
  • On-demand preview: Added DataCube.preview() to generate a XYZ service with the process graph and display a map widget

  • +
+
+
+

Fixed

+
    +
  • Fix format option conflict between save_result and create_job +#433

  • +
  • Ensure that OIDC device code link opens in a new tab/window #443

  • +
+
+
+
+

[0.18.0] - 2023-05-31

+
+

Added

+
    +
  • Support OIDC client credentials grant from a generic connection.authenticate_oidc() call +through environment variables +#419

  • +
+
+
+

Fixed

+
    +
  • Fixed UDP parameter conversion issue in build_process_dict when using parameter in context of run_udf +#431

  • +
+
+
+
+

[0.17.0] and [0.17.1] - 2023-05-16

+
+

Added

+
    +
  • Connection.authenticate_oidc(): add argument max_poll_time to set maximum device code flow poll time

  • +
  • Show progress bar while waiting for OIDC authentication with device code flow, +including special mode for in Jupyter notebooks. +(#237)

  • +
  • Basic support for load_stac process with Connection.load_stac() +(#425)

  • +
  • Add DataCube.aggregate_spatial_window()

  • +
+
+
+

Fixed

+
    +
  • Include “scope” parameter in OIDC token request with client credentials grant.

  • +
  • Support fractional seconds in Rfc3339.parse_datetime +(#418)

  • +
+
+
+
+

[0.16.0] - 2023-04-17 - “SRR5” release

+
+

Added

+
    +
  • Full support for user-uploaded files (/files endpoints) +(#377)

  • +
  • Initial, experimental “local processing” feature to use +openEO Python Client Library functionality on local +GeoTIFF/NetCDF files and also do the processing locally +using the openeo_processes_dask package +(#338)

  • +
  • Add BatchJob.get_results_metadata_url().

  • +
+
+
+

Changed

+
    +
  • Connection.list_files() returns a list of UserFile objects instead of a list of metadata dictionaries. +Use UserFile.metadata to get the original dictionary. +(#377)

  • +
  • DataCube.aggregate_spatial() returns a VectorCube now, instead of a DataCube +(#386). +The (experimental) fit_class_random_forest() and fit_regr_random_forest() methods +moved accordingly to the VectorCube class.

  • +
  • Improved documentation on openeo.processes and ProcessBuilder +(#390).

  • +
  • DataCube.create_job() and Connection.create_job() now require +keyword arguments for all but the first argument for clarity. +(#412).

  • +
  • Pass minimum log level to backend when retrieving batch job and secondary service logs. +(Open-EO/openeo-api#485, +Open-EO/openeo-python-driver#170)

  • +
+
+
+

Removed

+
    +
  • Dropped support for pre-1.0.0 versions of the openEO API +(#134):

    +
      +
    • Remove ImageCollectionClient and related helpers +(now unused leftovers from version 0.4.0 and earlier). +(Also #100)

    • +
    • Drop support for pre-1.0.0 job result metadata

    • +
    • Require at least version 1.0.0 of the openEO API for a back-end in Connection +and all its methods.

    • +
    +
  • +
+
+
+

Fixed

+
    +
  • Reinstated old behavior of authentication related user files (e.g. refresh token store) on Windows: when PrivateJsonFile may be readable by others, just log a message instead of raising PermissionError (387)

  • +
  • VectorCube.create_job() and MlModel.create_job() are properly aligned with DataCube.create_job() +regarding setting job title, description, etc. +(#412).

  • +
  • More robust handling of billing currency/plans in capabilities +(#414)

  • +
  • Avoid blindly adding a save_result node from DataCube.execute_batch() when there is already one +(#401)

  • +
+
+
+
+

[0.15.0] - 2023-03-03

+
+

Added

+
    +
  • The openeo Python client library can now also be installed with conda (conda-forge channel) +(#176)

  • +
  • Allow using a custom requests.Session in openeo.rest.auth.oidc logic

  • +
+
+
+

Changed

+
    +
  • Less verbose log printing on failed batch job #332

  • +
  • Improve (UTC) timezone handling in openeo.util.Rfc3339 and add rfc3339.today()/rfc3339.utcnow().

  • +
+
+
+
+

[0.14.1] - 2023-02-06

+
+

Fixed

+
    +
  • Fine-tuned XarrayDataCube tests for conda building and packaging (#176)

  • +
+
+
+
+

[0.14.0] - 2023-02-01

+
+

Added

+
    +
  • Jupyter integration: show process graph visualization of DataCube objects instead of generic repr. (#336)

  • +
  • Add Connection.vectorcube_from_paths() to load a vector cube +from files (on back-end) or URLs with load_uploaded_files process.

  • +
  • Python 3.10 and 3.11 are now officially supported +(test run now also for 3.10 and 3.11 in GitHub Actions, #346)

  • +
  • Support for simplified OIDC device code flow, (#335)

  • +
  • Added MultiBackendJobManager, based on implementation from openeo-classification project +(#361)

  • +
  • Added resilience to MultiBackendJobManager for backend failures (#365)

  • +
+
+
+

Changed

+
    +
  • execute_batch also skips temporal 502 Bad Gateway errors. #352

  • +
+
+
+

Fixed

+
    +
  • Fixed/improved math operator/process support for DataCubes in “apply” mode (non-“band math”), +allowing expressions like 10 * cube.log10() and ~(cube == 0) +(#123)

  • +
  • Support PrivateJsonFile permissions properly on Windows, using oschmod library. +(#198)

  • +
  • Fixed some broken unit tests on Windows related to path (separator) handling. +(#350)

  • +
+
+
+
+

[0.13.0] - 2022-10-10 - “UDF UX” release

+
+

Added

+
    +
  • Add max_cloud_cover argument to load_collection() to simplify setting maximum cloud cover (property eo:cloud_cover) (#328)

  • +
+
+
+

Changed

+
    +
  • Improve default dimension metadata of a datacube created with openeo.rest.datacube.DataCube.load_disk_collection

  • +
  • DataCube.download(): only automatically add save_result node when there is none yet.

  • +
  • Deprecation warnings: make sure they are shown by default and can be hidden when necessary.

  • +
  • Rework and improve openeo.UDF helper class for UDF usage +(#312).

    +
      +
    • allow loading directly from local file or URL

    • +
    • autodetect runtime from file/URL suffix or source code

    • +
    • hide implementation details around data argument (e.g.data={"from_parameter": "x"})

    • +
    • old usage patterns of openeo.UDF and DataCube.apply_dimension() still work but trigger deprecation warnings

    • +
    +
  • +
  • Show warning when using load_collection property filters that are not defined in the collection metadata (summaries).

  • +
+
+
+
+

[0.12.1] - 2022-09-15

+
+

Changed

+
    +
  • Eliminate dependency on distutils.version.LooseVersion which started to trigger deprecation warnings (#316).

  • +
+
+
+

Removed

+
    +
  • Remove old Connection.oidc_auth_user_id_token_as_bearer workaround flag (#300)

  • +
+
+
+

Fixed

+
    +
  • Fix refresh token handling in case of OIDC token request with refresh token grant (#326)

  • +
+
+
+
+

[0.12.0] - 2022-09-09

+
+

Added

+
    +
  • Allow passing raw JSON string, JSON file path or URL to Connection.download(), +Connection.execute() and Connection.create_job()

  • +
  • Add support for reverse math operators on DataCube in apply mode (#323)

  • +
  • Add DataCube.print_json() to simplify exporting process graphs in Jupyter or other interactive environments (#324)

  • +
  • Raise DimensionAlreadyExistsException when trying to add_dimension() a dimension with existing name (Open-EO/openeo-geopyspark-driver#205)

  • +
+
+
+

Changed

+
    +
  • DataCube.execute_batch() now also guesses the output format from the filename, +and allows using format argument next to the current out_format +to align with the DataCube.download() method. (#240)

  • +
  • Better client-side handling of merged band name metadata in DataCube.merge_cubes()

  • +
+
+
+

Removed

+
    +
  • Remove legacy DataCube.graph and DataCube.flatten() to prevent usage patterns that cause interoperability issues +(#155, #209, #324)

  • +
+
+
+
+

[0.11.0] - 2022-07-02

+
+

Added

+
    +
  • Add support for passing a PGNode/VectorCube as geometry to aggregate_spatial, mask_polygon, …

  • +
  • Add support for second order callbacks e.g. is_valid in count in reduce_dimension (#317)

  • +
+
+
+

Changed

+
    +
  • Rename RESTJob class name to less cryptic and more user-friendly BatchJob. +Original RESTJob is still available as deprecated alias. +(#280)

  • +
  • Dropped default reducer (“max”) from DataCube.reduce_temporal_simple()

  • +
  • Various documentation improvements:

    +
      +
    • general styling, landing page and structure tweaks (#285)

    • +
    • batch job docs (#286)

    • +
    • getting started docs (#308)

    • +
    • part of UDF docs (#309)

    • +
    • added process-to-method mapping docs

    • +
    +
  • +
  • Drop hardcoded h5netcdf engine from XarrayIO.from_netcdf_file() +and XarrayIO.to_netcdf_file() (#314)

  • +
  • Changed argument name of Connection.describe_collection() from name to collection_id +to be more in line with other methods/functions.

  • +
+
+
+

Fixed

+
    +
  • Fix context/condition confusion bug with count callback in DataCube.reduce_dimension() (#317)

  • +
+
+
+
+

[0.10.1] - 2022-05-18 - “LPS22” release

+
+

Added

+
    +
  • Add context parameter to DataCube.aggregate_spatial(), DataCube.apply_dimension(), +DataCube.apply_neighborhood(), DataCube.apply(), DataCube.merge_cubes(). +(#291)

  • +
  • Add DataCube.fit_regr_random_forest() (#293)

  • +
  • Add PGNode.update_arguments(), which combined with DataCube.result_node() allows to do advanced process graph argument tweaking/updating without using ._pg hacks.

  • +
  • JobResults.download_files(): also download (by default) the job result metadata as STAC JSON file (#184)

  • +
  • OIDC handling in Connection: try to automatically refresh access token when expired (#298)

  • +
  • Connection.create_job raises exception if response does not contain a valid job_id

  • +
  • Add openeo.udf.debug.inspect for using the openEO inspect process in a UDF (#302)

  • +
  • Add openeo.util.to_bbox_dict() to simplify building a openEO style bbox dictionary, e.g. from a list or shapely geometry (#304)

  • +
+
+
+

Removed

+
    +
  • Removed deprecated (and non-functional) zonal_statistics method from old ImageCollectionClient API. (#144)

  • +
+
+
+
+

[0.10.0] - 2022-04-08 - “SRR3” release

+
+

Added

+
    +
  • Add support for comparison operators (<, >, <= and >=) in callback process building

  • +
  • Added Connection.describe_process() to retrieve and show a single process

  • +
  • Added DataCube.flatten_dimensions() and DataCube.unflatten_dimension +(Open-EO/openeo-processes#308, Open-EO/openeo-processes#316)

  • +
  • Added VectorCube.run_udf (to avoid non-standard process_with_node(UDF(...)) usage)

  • +
  • Added DataCube.fit_class_random_forest() and Connection.load_ml_model() to train and load Machine Learning models +(#279)

  • +
  • Added DataCube.predict_random_forest() to easily use reduce_dimension with a predict_random_forest reducer +using a MlModel (trained with fit_class_random_forest) +(#279)

  • +
  • Added DataCube.resample_cube_temporal (#284)

  • +
  • Add target_dimension argument to DataCube.aggregate_spatial (#288)

  • +
  • Add basic configuration file system to define a default back-end URL and enable auto-authentication (#264, #187)

  • +
  • Add context argument to DataCube.chunk_polygon()

  • +
  • Add Connection.version_info() to list version information about the client, the API and the back-end

  • +
+
+
+

Changed

+
    +
  • Include openEO API error id automatically in exception message to simplify user support and post-mortem analysis.

  • +
  • Use Connection.default_timeout (when set) also on version discovery request

  • +
  • Drop ImageCollection from DataCube’s class hierarchy. +This practically removes very old (pre-0.4.0) methods like date_range_filter and bbox_filter from DataCube. +(#100, #278)

  • +
  • Deprecate DataCube.send_job in favor of DataCube.create_job for better consistency (internally and with other libraries) (#276)

  • +
  • Update (autogenerated) openeo.processes module to 1.2.0 release (2021-12-13) of openeo-processes

  • +
  • Update (autogenerated) openeo.processes module to draft version of 2022-03-16 (e4df8648) of openeo-processes

  • +
  • Update openeo.extra.spectral_indices to a post-0.0.6 version of Awesome Spectral Indices

  • +
+
+
+

Removed

+
    +
  • Removed deprecated zonal_statistics method from DataCube. (#144)

  • +
  • Deprecate old-style DataCube.polygonal_mean_timeseries(), DataCube.polygonal_histogram_timeseries(), +DataCube.polygonal_median_timeseries() and DataCube.polygonal_standarddeviation_timeseries()

  • +
+
+
+

Fixed

+
    +
  • Support rename_labels on temporal dimension (#274)

  • +
  • Basic support for mixing DataCube and ProcessBuilder objects/processing (#275)

  • +
+
+
+
+

[0.9.2] - 2022-01-14

+
+

Added

+
    +
  • Add experimental support for chunk_polygon process (Open-EO/openeo-processes#287)

  • +
  • Add support for spatial_extent, temporal_extent and bands to Connection.load_result()

  • +
  • Setting the environment variable OPENEO_BASEMAP_URL allows to set a new templated URL to a XYZ basemap for the Vue Components library, OPENEO_BASEMAP_ATTRIBUTION allows to set the attribution for the basemap (#260)

  • +
  • Initial support for experimental “federation:missing” flag on partial openEO Platform user job listings (Open-EO/openeo-api#419)

  • +
  • Best effort detection of mistakenly using Python builtin sum or all functions in callbacks (Forum #113)

  • +
  • Automatically print batch job logs when job doesn’t finish successfully (using execute_batch/run_synchronous/start_and_wait).

  • +
+
+
+
+

[0.9.1] - 2021-11-16

+
+

Added

+
    +
  • Add options argument to DataCube.atmospheric_correction (Open-EO/openeo-python-driver#91)

  • +
  • Add atmospheric_correction_options and cloud_detection_options arguments to DataCube.ard_surface_reflectance (Open-EO/openeo-python-driver#91)

  • +
  • UDP storing: add support for “returns”, “categories”, “examples” and “links” properties (#242)

  • +
  • Add openeo.extra.spectral_indices: experimental API to easily compute spectral indices (vegetation, water, urban, …) +on a DataCube, using the index definitions from Awesome Spectral Indices

  • +
+
+
+

Changed

+
    +
  • Batch job status poll loop: ignore (temporary) “service unavailable” errors (Open-EO/openeo-python-driver#96)

  • +
  • Batch job status poll loop: fail when there are too many soft errors (temporary connection/availability issues)

  • +
+
+
+

Fixed

+
    +
  • Fix DataCube.ard_surface_reflectance() to use process ard_surface_reflectance instead of atmospheric_correction

  • +
+
+
+
+

[0.9.0] - 2021-10-11

+
+

Added

+
    +
  • Add command line tool openeo-auth token-clear to remove OIDC refresh token cache

  • +
  • Add support for OIDC device authorization grant without PKCE nor client secret, +(#225, openeo-api#410)

  • +
  • Add DataCube.dimension_labels() (EP-4008)

  • +
  • Add Connection.load_result() (EP-4008)

  • +
  • Add proper support for child callbacks in fit_curve and predict_curve (#229)

  • +
  • ProcessBuilder: Add support for array_element(data, n) through data[n] syntax (#228)

  • +
  • ProcessBuilder: Add support for eq and neq through == and != operators (EP-4011)

  • +
  • Add DataCube.validate() for process graph validation (EP-4012 related)

  • +
  • Add Connection.as_curl() for generating curl command to evaluate a process graph or DataCube from the command line

  • +
  • Add support in DataCube.download() to guess output format from extension of a given filename

  • +
+
+
+

Changed

+
    +
  • Improve default handling of crs (and base/height) in filter_bbox: avoid explicitly sending null unnecessarily +(#233).

  • +
  • Update documentation/examples/tests: EPSG CRS in filter_bbox should be integer code, not string +(#233).

  • +
  • Raise ProcessGraphVisitException from ProcessGraphVisitor.resolve_from_node() (instead of generic ValueError)

  • +
  • DataCube.linear_scale_range is now a shortcut for DataCube.apply(lambda  x:x.x.linear_scale_range( input_min, input_max, output_min, output_max)). +Instead of creating an invalid process graph that tries to invoke linear_scale_range on a datacube directly.

  • +
  • Nicer error message when back-end does not support basic auth (#247)

  • +
+
+
+

Removed

+
    +
  • Remove unused and outdated (0.4-style) File/RESTFile classes (#115)

  • +
  • Deprecate usage of DataCube.graph property (#209)

  • +
+
+
+
+

[0.8.2] - 2021-08-24

+

Minor release to address version packaging issue.

+
+
+

[0.8.1] - 2021-08-24

+
+

Added

+
    +
  • Support nested callbacks inside array arguments, for instance in array_modify, array_create

  • +
  • Support array_concat

  • +
  • add ProcessGraphUnflattener and PGNodeGraphUnflattener to unflatten a flat dict representation of a process +graph to a PGNode graph (EP-3609)

  • +
  • Add Connection.datacube_from_flat_graph and Connection.datacube_from_json to construct a DataCube +from flat process graph representation (e.g. JSON file or JSON URL) (EP-3609)

  • +
  • Add documentation about UDP unflattening and sharing (EP-3609)

  • +
  • Add fit_curve and predict_curve, two methods used in change detection

  • +
+
+
+

Changed

+
    +
  • Update processes.py based on 1.1.0 release op openeo-processes project

  • +
  • processes.py: include all processes from “proposals” folder of openeo-processes project

  • +
  • Jupyter integration: Visual rendering for process graphs shown instead of a plain JSON representation.

  • +
  • Migrate from Travis CI to GitHub Actions for documentation building and unit tests (#178, EP-3645)

  • +
+
+
+

Removed

+
    +
  • Removed unit test runs for Python 3.5 (#210)

  • +
+
+
+
+

[0.8.0] - 2021-06-25

+
+

Added

+
    +
  • Allow, but raise warning when specifying a CRS for the geometry passed to aggregate_spatial and mask_polygon, +which is non-standard/experimental feature, only supported by specific back-ends +(#204)

  • +
  • Add optional argument to Parameter and fix re-encoding parameters with default value. (EP-3846)

  • +
  • Add support to test strict equality with ComparableVersion

  • +
  • Jupyter integration: add rich HTML rendering for more backend metadata (Job, Job Estimate, Logs, Services, User-Defined Processes)

  • +
  • Add support for filter_spatial

  • +
  • Add support for aggregate_temporal_period

  • +
  • Added class Service for secondary web-services

  • +
  • Added a method service to Connection

  • +
  • Add Rfc3339.parse_date and Rfc3339.parse_date_or_datetime

  • +
+
+
+

Changed

+
    +
  • Disallow redirects on POST/DELETE/… requests and require status code 200 on POST /result requests. +This improves error information where POST /result would involve a redirect. (EP-3889)

  • +
  • Class JobLogEntry got replaced with a more complete and re-usable LogEntry dict

  • +
  • The following methods return a Service class instead of a dict: tiled_viewing_service in ImageCollection, ImageCollectionClient and DataCube, create_service in Connection

  • +
+
+
+

Deprecated

+
    +
  • The method remove_service in Connection has been deprecated in favor of delete_service in the Service class

  • +
+
+
+
+

[0.7.0] - 2021-04-21

+
+

Added

+ +
+
+

Changed

+
    +
  • Eliminate development/optional dependency on openeo_udf project +(#159, #190, EP-3578). +Now the openEO client library itself contains the necessary classes and implementation to run UDF code locally.

  • +
+
+
+

Fixed

+
    +
  • Connection: don’t send default auth headers to non-backend domains (#201)

  • +
+
+
+
+

[0.6.1] - 2021-03-29

+
+

Changed

+
    +
  • Improve OpenID Connect usability on Windows: don’t raise exception on file permissions +that can not be changed (by os.chmod on Windows) (#198)

  • +
+
+
+
+

[0.6.0] - 2021-03-26

+
+

Added

+
    +
  • Add initial/experimental support for OIDC device code flow with PKCE (alternative for client secret) (#191 / EP-3700)

  • +
  • When creating a connection: use “https://” by default when no protocol is specified

  • +
  • DataCube.mask_polygon: support Parameter argument for mask

  • +
  • Add initial/experimental support for default OIDC client (#192, Open-EO/openeo-api#366)

  • +
  • Add Connection.authenticate_oidc for user-friendlier OIDC authentication: first try refresh token and fall back on device code flow

  • +
  • Add experimental support for array_modify process (Open-EO/openeo-processes#202)

  • +
+
+
+

Removed

+
    +
  • Remove old/deprecated Connection.authenticate_OIDC()

  • +
+
+
+
+

[0.5.0] - 2021-03-17

+
+

Added

+
    +
  • Add namespace support to DataCube.process, PGNode, ProcessGraphVisitor (minor API breaking change) and related. +Allows building process graphs with processes from non-“backend” namespaces +(#182)

  • +
  • collection_items to request collection items through a STAC API

  • +
  • paginate as a basic method to support link-based pagination

  • +
  • Add namespace support to Connection.datacube_from_process

  • +
  • Add basic support for band name aliases in metadata.Band for band index lookup (EP-3670)

  • +
+
+
+

Changed

+
    +
  • OpenEoApiError moved from openeo.rest.connection to openeo.rest

  • +
  • Added HTML representation for list_jobs, list_services, list_files and for job results

  • +
  • Improve refresh token handling in OIDC logic: avoid requesting refresh token +(which can fail if OIDC client is not set up for that) when not necessary (EP-3700)

  • +
  • RESTJob.start_and_wait: add status line when sending “start” request, and drop microsecond resolution from status lines

  • +
+
+
+

Fixed

+
    +
  • Updated Vue Components library (solves issue with loading from slower back-ends where no result was shown)

  • +
+
+
+
+

[0.4.10] - 2021-02-26

+
+

Added

+
    +
  • Add “reflected” operator support to ProcessBuilder

  • +
  • Add RESTJob.get_results(), JobResults and ResultAsset for more fine-grained batch job result handling. (EP-3739)

  • +
  • Add documentation on batch job result (asset) handling and downloading

  • +
+
+
+

Changed

+
    +
  • Mark Connection.imagecollection more clearly as deprecated/legacy alias of Connection.load_collection

  • +
  • Deprecated job_results() and job_logs() on Connection object, it’s better to work through RESTJob object.

  • +
  • Update DataCube.sar_backscatter to the latest process spec: add coefficient argument +and remove orthorectify, rtc. (openeo-processes#210)

  • +
+
+
+

Removed

+
    +
  • Remove outdated batch job result download logic left-overs

  • +
  • Remove (outdated) abstract base class openeo.job.Job: did not add value, only caused maintenance overhead. (#115)

  • +
+
+
+
+

[0.4.9] - 2021-01-29

+
+

Added

+
    +
  • Make DataCube.filter_bbox() easier to use: allow passing a bbox tuple, list, dict or even shapely geometry directly as first positional argument or as bbox keyword argument. +Handling of the legacy non-standard west-east-north-south positional argument order is preserved for now (#136)

  • +
  • Add “band math” methods DataCube.ln(), DataCube.logarithm(base), DataCube.log10() and DataCube.log2()

  • +
  • Improved support for creating and handling parameters when defining user-defined processes (EP-3698)

  • +
  • Initial Jupyter integration: add rich HTML rendering of backend metadata (collections, file formats, UDF runtimes, …) +(#170)

  • +
  • add resolution_merge process (experimental) (EP-3687, openeo-processes#221)

  • +
  • add sar_backscatter process (experimental) (EP-3612, openeo-processes#210)

  • +
+
+
+

Fixed

+
    +
  • Fixed ‘Content-Encoding’ handling in Connection.download: client did not automatically decompress /result +responses when necessary (#175)

  • +
+
+
+
+

[0.4.8] - 2020-11-17

+
+

Added

+
    +
  • Add DataCube.aggregate_spatial()

  • +
+
+
+

Changed

+
    +
  • Get/create default RefreshTokenStore lazily in Connection

  • +
  • Various documentation tweaks

  • +
+
+
+
+

[0.4.7] - 2020-10-22

+
+

Added

+
    +
  • Add support for title/description/plan/budget in DataCube.send_job (#157 / #158)

  • +
  • Add DataCube.to_json() to easily get JSON representation of a DataCube

  • +
  • Allow to subclass CollectionMetadata and preserve original type when “cloning”

  • +
+
+
+

Changed

+
    +
  • Changed execute_batch to support downloading multiple files (within EP-3359, support profiling)

  • +
  • Don’t send None-valued title/description/plan/budget fields from DataCube.send_job (#157 / #158)

  • +
+
+
+

Removed

+
    +
  • Remove duplicate and broken Connection.list_processgraphs

  • +
+
+
+

Fixed

+
    +
  • Various documentation fixes and tweaks

  • +
  • Avoid merge_cubes warning when using non-band-math DataCube operators

  • +
+
+
+
+

[0.4.6] - 2020-10-15

+
+

Added

+
    +
  • Add DataCube.aggregate_temporal

  • +
  • Add initial support to download profiling information

  • +
+
+
+

Changed

+
    +
  • Deprecated legacy functions/methods are better documented as such and link to a recommended alternative (EP-3617).

  • +
  • Get/create default AuthConfig in Connection lazily (allows client to run in environments without existing (default) config folder)

  • +
+
+
+

Deprecated

+
    +
  • Deprecate zonal_statistics in favor of aggregate_spatial

  • +
+
+
+

Removed

+
    +
  • Remove support for old, non-standard stretch_colors process (Use linear_scale_range instead).

  • +
+
+
+
+

[0.4.5] - 2020-10-01

+
+

Added

+
    +
  • Also handle dict arguments in dereference_from_node_arguments (EP-3509)

  • +
  • Add support for less/greater than and equal operators

  • +
  • Raise warning when user defines a UDP with same id as a pre-defined one (EP-3544, #147)

  • +
  • Add rename_labels support in metadata (EP-3585)

  • +
  • Improve “callback” handling (sub-process graphs): add predefined callbacks for all official processes and functionality to assemble these (EP-3555, #153)

  • +
  • Moved datacube write/save/plot utilities from udf to client (EP-3456)

  • +
  • Add documentation on OpenID Connect authentication (EP-3485)

  • +
+
+
+

Fixed

+
    +
  • Fix kwargs handling in TimingLogger decorator

  • +
+
+
+
+

[0.4.4] - 2020-08-20

+
+

Added

+
    +
  • Add openeo-auth command line tool to manage OpenID Connect (and basic auth) related configs (EP-3377/EP-3493)

  • +
  • Support for using config files for OpenID Connect and basic auth based authentication, instead of hardcoding credentials (EP-3377/EP-3493)

  • +
+
+
+

Fixed

+
    +
  • Fix target_band handling in DataCube.ndvi (EP-3496)

  • +
+
+
+
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/configuration.html b/configuration.html new file mode 100644 index 000000000..f26ec99f1 --- /dev/null +++ b/configuration.html @@ -0,0 +1,239 @@ + + + + + + + + Configuration — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +
+

Configuration

+
+

Warning

+

Configuration files are an experimental feature +and some details are subject to change.

+
+
+

Added in version 0.10.0.

+
+
+

Configuration files

+

Some functionality of the openEO Python client library can customized +through configuration files.

+
+

Note

+

Note that these configuration files are different from the authentication secret/cache files +discussed at Auth config files and openeo-auth helper tool. +The latter are focussed on storing authentication secrets +and are mostly managed automatically. +The normal configuration files however should not contain secrets, +are usually edited manually, can be placed at various locations +and it is not uncommon to store them in version control where that makes sense.

+
+
+

Format

+

At the moment, only INI-style configs are supported. +This is a simple configuration format, easy to maintain +and it is supported out of the box in Python (without additional libraries).

+

Example (note the use of sections and support for comments):

+
[General]
+# Print loaded configuration file and default back-end URLs in interactive mode
+verbose = auto
+
+[Connection]
+default_backend = openeo.cloud
+
+
+
+
+

Location

+

The following configuration locations are probed (in this order) for an existing configuration file. The first successful hit will be loaded:

+
    +
  • the path in environment variable OPENEO_CLIENT_CONFIG if it is set (filename must end with extension .ini)

  • +
  • the file openeo-client-config.ini in the current working directory

  • +
  • the file ${OPENEO_CONFIG_HOME}/openeo-client-config.ini if the environment variable OPENEO_CONFIG_HOME is set

  • +
  • the file ${XDG_CONFIG_HOME}/openeo-python-client/openeo-client-config.ini if environment variable XDG_CONFIG_HOME is set

  • +
  • the file .openeo-client-config.ini in the home folder of the user

  • +
+
+
+

Configuration options

+ +++++ + + + + + + + + + + + + + + + + + + + + + + + + +

Config Section

Config

Description and possible values

General

verbose

+
Verbosity mode when important config values are used:
    +
  • print: always print() info

  • +
  • auto (default): only print() when in an interactive context

  • +
  • off: don’t print info

  • +
+
+
+

Connection

default_backend

Default back-end to connect to when openeo.connect() +is used without explicit back-end URL. +Also see Default openEO back-end URL and auto-authentication

Connection

default_backend.auto_authenticate

+
Automatically authenticate in openeo.connect() when using the default_backend config. Allowed values:
    +
  • basic for basic authentication

  • +
  • oidc for OpenID Connect authentication

  • +
  • off (default) for no authentication

  • +
+
+
+

Also see Default openEO back-end URL and auto-authentication

+

Connection

auto_authenticate

Automatically authenticate in openeo.connect(). +Allowed values: see default_backend.auto_authenticate. +Also see Default openEO back-end URL and auto-authentication

+
+
+
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/cookbook/ard.html b/cookbook/ard.html new file mode 100644 index 000000000..e84a88c80 --- /dev/null +++ b/cookbook/ard.html @@ -0,0 +1,234 @@ + + + + + + + + Analysis Ready Data generation — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +
+

Analysis Ready Data generation

+

For certain use cases, the preprocessed data collections available in the openEO back-ends are not sufficient or simply not +available. For that case, openEO supports a few very common preprocessing scenario:

+
    +
  • Atmospheric correction of optical data

  • +
  • SAR backscatter computation

  • +
+

These processes also offer a number of parameters to customize the processing. There’s also variants with a default +parametrization that results in data that is compliant with CEOS CARD4L specifications https://ceos.org/ard/.

+

We should note that these operations can be computationally expensive, so certainly affect overall processing time and +cost of your final algorithm. Hence, make sure to make an informed decision when you decide to use these methods.

+
+

Atmospheric correction

+

The atmospheric correction process can apply a chosen +method on raw ‘L1C’ data. The supported methods and input datasets depend on the back-end, because not every method is +validated or works on any dataset, and different back-ends try to offer a variety of options. This gives you as a user +more options to run and compare different methods, and select the most suitable one for your case.

+

To perform an atmospheric correction, the user has to +load an uncorrected L1C optical dataset. On the resulting datacube, the atmospheric_correction() +method can be invoked. Note that it may not be possible to apply certain processes to the raw input data: preprocessing +algorithms can be tightly coupled with the raw data, making it hard or impossible for the back-end to perform operations +in between loading and correcting the data.

+

The CARD4L variant of this process is: ard_surface_reflectance(). This process follows +CEOS specifications, and thus can additional processing steps, like a BRDF correction, that are not yet available as a +separate process.

+
+

Reference implementations

+

This section shows a few working examples for these processes.

+
+

EODC back-end

+

EODC (https://openeo.eodc.eu/v1.0) supports ard_surface_reflectance, based on the FORCE toolbox. (https://github.com/davidfrantz/force)

+
+
+

Geotrellis back-end

+

The geotrellis back-end (https://openeo.vito.be) supports atmospheric_correction() with iCor and SMAC as methods. +The version of iCor only offers basic atmoshperic correction features, without special options for water products: https://remotesensing.vito.be/case/icor +SMAC is implemented based on: https://github.com/olivierhagolle/SMAC +Both methods have been tested with Sentinel-2 as input. The viewing and sun angles need to be selected by the user to make them +available for the algorithm.

+

This is an example of applying iCor:

+
l1c = connection.load_collection("SENTINEL2_L1C_SENTINELHUB",
+        spatial_extent={'west':3.758216409030558,'east':4.087806252,'south':51.291835566,'north':51.3927399},
+        temporal_extent=["2017-03-07","2017-03-07"],bands=['B04','B03','B02','B09','B8A','B11','sunAzimuthAngles','sunZenithAngles','viewAzimuthMean','viewZenithMean'] )
+l1c.atmospheric_correction(method="iCor").download("rgb-icor.geotiff",format="GTiff")
+
+
+
+
+
+
+

SAR backscatter

+

Data from synthetic aperture radar sensors requires significant preprocessing to be calibrated and normalized for terrain. +This is referred to as backscatter computation, and supported by +sar_backscatter and the CARD4L compliant variant +ard_normalized_radar_backscatter

+

The user should load a datacube containing raw SAR data, such as Sentinel-1 GRD. On the resulting datacube, the +sar_backscatter() method can be invoked. The CEOS CARD4L variant is: +ard_normalized_radar_backscatter(). These processes are tightly coupled to +metadata from specific sensors, so it is not possible to apply other processes to the datacube first, +with the exception of specifying filters in space and time.

+
+

Reference implementations

+

This section shows a few working examples for these processes.

+
+

EODC back-end

+

EODC (https://openeo.eodc.eu/v1.0) supports sar_backscatter, based on the Sentinel-1 toolbox. (https://sentinel.esa.int/web/sentinel/toolboxes/sentinel-1)

+
+
+

Geotrellis back-end

+

When working with the Sentinelhub SENTINEL1_GRD collection, both sar processes can be used. The underlying implementation is +provided by Sentinelhub, (https://docs.sentinel-hub.com/api/latest/data/sentinel-1-grd/#processing-options), and offers full +CARD4L compliant processing options.

+

This is an example of ard_normalized_radar_backscatter():

+
s1grd = (connection.load_collection('SENTINEL1_GRD', bands=['VH', 'VV'])
+ .filter_bbox(west=2.59003, east=2.8949, north=51.2206, south=51.069)
+ .filter_temporal(extent=["2019-10-10","2019-10-10"]))
+
+job = s1grd.ard_normalized_radar_backscatter().execute_batch()
+
+for asset in job.get_results().get_assets():
+    asset.download()
+
+
+

When working with other GRD data, an implementation based on Orfeo Toolbox is used:

+ +

The Orfeo implementation currently only supports sigma0 computation, and is not CARD4L compliant.

+
+
+
+
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/cookbook/index.html b/cookbook/index.html new file mode 100644 index 000000000..19f1daea0 --- /dev/null +++ b/cookbook/index.html @@ -0,0 +1,219 @@ + + + + + + + + openEO CookBook — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + + + +
+ + +
+
+ + + + + + + \ No newline at end of file diff --git a/cookbook/job_manager.html b/cookbook/job_manager.html new file mode 100644 index 000000000..f5829fadb --- /dev/null +++ b/cookbook/job_manager.html @@ -0,0 +1,755 @@ + + + + + + + + Multi Backend Job Manager — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +
+

Multi Backend Job Manager

+
+

API

+
+

Warning

+

This is a new experimental API, subject to change.

+
+
+
+class openeo.extra.job_management.MultiBackendJobManager(poll_sleep=60, root_dir='.', *, cancel_running_job_after=None)[source]
+

Tracker for multiple jobs on multiple backends.

+

Usage example:

+
import logging
+import pandas as pd
+import openeo
+from openeo.extra.job_management import MultiBackendJobManager
+
+logging.basicConfig(
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    level=logging.INFO
+)
+
+manager = MultiBackendJobManager()
+manager.add_backend("foo", connection=openeo.connect("http://foo.test"))
+manager.add_backend("bar", connection=openeo.connect("http://bar.test"))
+
+jobs_df = pd.DataFrame(...)
+output_file = "jobs.csv"
+
+def start_job(
+    row: pd.Series,
+    connection: openeo.Connection,
+    **kwargs
+) -> openeo.BatchJob:
+    year = row["year"]
+    cube = connection.load_collection(
+        ...,
+        temporal_extent=[f"{year}-01-01", f"{year+1}-01-01"],
+    )
+    ...
+    return cube.create_job(...)
+
+manager.run_jobs(df=jobs_df, start_job=start_job, output_file=output_file)
+
+
+

See run_jobs() for more information on the start_job callable.

+
+
Parameters:
+
    +
  • poll_sleep (int) – How many seconds to sleep between polls.

  • +
  • root_dir (Union[str, Path, None]) –

    Root directory to save files for the jobs, e.g. metadata and error logs. +This defaults to “.” the current directory.

    +

    Each job gets its own subfolder in this root directory. +You can use the following methods to find the relevant paths, +based on the job ID:

    +
    +
      +
    • get_job_dir

    • +
    • get_error_log_path

    • +
    • get_job_metadata_path

    • +
    +
    +

  • +
  • cancel_running_job_after (Optional[int]) – Optional temporal limit (in seconds) after which running jobs should be canceled +by the job manager.

  • +
+
+
+
+

Added in version 0.14.0.

+
+
+

Changed in version 0.32.0: Added cancel_running_job_after parameter.

+
+
+
+add_backend(name, connection, parallel_jobs=2)[source]
+

Register a backend with a name and a Connection getter.

+
+
Parameters:
+
    +
  • name (str) – Name of the backend.

  • +
  • connection (Union[Connection, Callable[[], Connection]]) – Either a Connection to the backend, or a callable to create a backend connection.

  • +
  • parallel_jobs (int) – Maximum number of jobs to allow in parallel on a backend.

  • +
+
+
+
+ +
+
+ensure_job_dir_exists(job_id)[source]
+

Create the job folder if it does not exist yet.

+
+
Return type:
+

Path

+
+
+
+ +
+
+get_error_log_path(job_id)[source]
+

Path where error log file for the job is saved.

+
+
Return type:
+

Path

+
+
+
+ +
+
+get_job_dir(job_id)[source]
+

Path to directory where job metadata, results and error logs are be saved.

+
+
Return type:
+

Path

+
+
+
+ +
+
+get_job_metadata_path(job_id)[source]
+

Path where job metadata file is saved.

+
+
Return type:
+

Path

+
+
+
+ +
+
+on_job_cancel(job, row)[source]
+

Handle a job that was cancelled. Can be overridden to provide custom behaviour.

+

Default implementation does not do anything.

+
+
Parameters:
+
    +
  • job (BatchJob) – The job that was canceled.

  • +
  • row – DataFrame row containing the job’s metadata.

  • +
+
+
+
+ +
+
+on_job_done(job, row)[source]
+

Handles jobs that have finished. Can be overridden to provide custom behaviour.

+

Default implementation downloads the results into a folder containing the title.

+
+
Parameters:
+
    +
  • job (BatchJob) – The job that has finished.

  • +
  • row – DataFrame row containing the job’s metadata.

  • +
+
+
+
+ +
+
+on_job_error(job, row)[source]
+

Handles jobs that stopped with errors. Can be overridden to provide custom behaviour.

+

Default implementation writes the error logs to a JSON file.

+
+
Parameters:
+
    +
  • job (BatchJob) – The job that has finished.

  • +
  • row – DataFrame row containing the job’s metadata.

  • +
+
+
+
+ +
+
+run_jobs(df=None, start_job=<function _start_job_default>, job_db=None, **kwargs)[source]
+

Runs jobs, specified in a dataframe, and tracks parameters.

+
+
Parameters:
+
    +
  • df (Optional[DataFrame]) – DataFrame that specifies the jobs, and tracks the jobs’ statuses. If None, the job_db has to be specified and will be used.

  • +
  • start_job (Callable[[], BatchJob]) –

    A callback which will be invoked with, amongst others, +the row of the dataframe for which a job should be created and/or started. +This callable should return a openeo.rest.job.BatchJob object.

    +

    The following parameters will be passed to start_job:

    +
    +
    +
    row (pandas.Series):

    The row in the pandas dataframe that stores the jobs state and other tracked data.

    +
    +
    connection_provider:

    A getter to get a connection by backend name. +Typically, you would need either the parameter connection_provider, +or the parameter connection, but likely you will not need both.

    +
    +
    connection (Connection):

    The Connection itself, that has already been created. +Typically, you would need either the parameter connection_provider, +or the parameter connection, but likely you will not need both.

    +
    +
    provider (str):

    The name of the backend that will run the job.

    +
    +
    +
    +

    You do not have to define all the parameters described below, but if you leave +any of them out, then remember to include the *args and **kwargs parameters. +Otherwise you will have an exception because run_jobs() passes unknown parameters to start_job.

    +

  • +
  • job_db (Union[str, Path, JobDatabaseInterface, None]) –

    Job database to load/store existing job status data and other metadata from/to. +Can be specified as a path to CSV or Parquet file, +or as a custom database object following the JobDatabaseInterface interface.

    +
    +

    Note

    +

    Support for Parquet files depends on the pyarrow package +as optional dependency.

    +
    +

  • +
+
+
Return type:
+

dict

+
+
Returns:
+

dictionary with stats collected during the job running loop. +Note that the set of fields in this dictionary is experimental +and subject to change

+
+
+
+

Changed in version 0.31.0: Added support for persisting the job metadata in Parquet format.

+
+
+

Changed in version 0.31.0: Replace output_file argument with job_db argument, +which can be a path to a CSV or Parquet file, +or a user-defined JobDatabaseInterface object. +The deprecated output_file argument is still supported for now.

+
+
+

Changed in version 0.33.0: return a stats dictionary

+
+
+ +
+
+start_job_thread(start_job, job_db)[source]
+

Start running the jobs in a separate thread, returns afterwards.

+
+
Parameters:
+
    +
  • start_job (Callable[[], BatchJob]) –

    A callback which will be invoked with, amongst others, +the row of the dataframe for which a job should be created and/or started. +This callable should return a openeo.rest.job.BatchJob object.

    +

    The following parameters will be passed to start_job:

    +
    +
    +
    row (pandas.Series):

    The row in the pandas dataframe that stores the jobs state and other tracked data.

    +
    +
    connection_provider:

    A getter to get a connection by backend name. +Typically, you would need either the parameter connection_provider, +or the parameter connection, but likely you will not need both.

    +
    +
    connection (Connection):

    The Connection itself, that has already been created. +Typically, you would need either the parameter connection_provider, +or the parameter connection, but likely you will not need both.

    +
    +
    provider (str):

    The name of the backend that will run the job.

    +
    +
    +
    +

    You do not have to define all the parameters described below, but if you leave +any of them out, then remember to include the *args and **kwargs parameters. +Otherwise you will have an exception because run_jobs() passes unknown parameters to start_job.

    +

  • +
  • job_db (JobDatabaseInterface) –

    Job database to load/store existing job status data and other metadata from/to. +Can be specified as a path to CSV or Parquet file, +or as a custom database object following the JobDatabaseInterface interface.

    +
    +

    Note

    +

    Support for Parquet files depends on the pyarrow package +as optional dependency.

    +
    +

  • +
+
+
+
+

Added in version 0.32.0.

+
+
+ +
+
+stop_job_thread(timeout_seconds=<object object>)[source]
+

Stop the job polling thread.

+
+
Parameters:
+

timeout_seconds (Optional[float]) – The time to wait for the thread to stop. +By default, it will wait for 2 times the poll_sleep time. +Set to None to wait indefinitely.

+
+
+
+

Added in version 0.32.0.

+
+
+ +
+ +
+
+class openeo.extra.job_management.JobDatabaseInterface[source]
+

Interface for a database of job metadata to use with the MultiBackendJobManager, +allowing to regularly persist the job metadata while polling the job statuses +and resume/restart the job tracking after it was interrupted.

+
+

Added in version 0.31.0.

+
+
+
+abstract count_by_status(statuses=())[source]
+

Retrieve the number of jobs per status.

+
+
Parameters:
+

statuses (Iterable[str]) – List/set of statuses to include. If empty, all statuses are included.

+
+
Return type:
+

dict

+
+
Returns:
+

dictionary with status as key and the count as value.

+
+
+
+ +
+
+abstract exists()[source]
+

Does the job database already exist, to read job data from?

+
+
Return type:
+

bool

+
+
+
+ +
+
+abstract get_by_status(statuses, max=None)[source]
+

Returns a dataframe with jobs, filtered by status.

+
+
Parameters:
+
    +
  • statuses (List[str]) – List of statuses to include.

  • +
  • max – Maximum number of jobs to return.

  • +
+
+
Return type:
+

DataFrame

+
+
Returns:
+

DataFrame with jobs filtered by status.

+
+
+
+ +
+
+abstract persist(df)[source]
+

Store job data to the database. +The provided dataframe may contain partial information, which is merged into the larger database.

+
+
Parameters:
+

df (DataFrame) – job data to store.

+
+
+
+ +
+ +
+
+class openeo.extra.job_management.CsvJobDatabase(path)[source]
+

Persist/load job metadata with a CSV file.

+
+
Implements:
+

JobDatabaseInterface

+
+
Parameters:
+

path (Union[str, Path]) – Path to local CSV file.

+
+
+
+

Note

+

Support for GeoPandas dataframes depends on the geopandas package +as optional dependency.

+
+
+

Added in version 0.31.0.

+
+
+ +
+
+class openeo.extra.job_management.ParquetJobDatabase(path)[source]
+

Persist/load job metadata with a Parquet file.

+
+
Implements:
+

JobDatabaseInterface

+
+
Parameters:
+

path (Union[str, Path]) – Path to the Parquet file.

+
+
+
+

Note

+

Support for Parquet files depends on the pyarrow package +as optional dependency.

+

Support for GeoPandas dataframes depends on the geopandas package +as optional dependency.

+
+
+

Added in version 0.31.0.

+
+
+ +
+
+class openeo.extra.job_management.ProcessBasedJobCreator(*, process_id=None, namespace=None, parameter_defaults=None, parameter_column_map=None)[source]
+

Batch job creator +(to be used together with MultiBackendJobManager) +that takes a parameterized openEO process definition +(e.g a user-defined process (UDP) or a remote openEO process definition), +and creates a batch job +for each row of the dataframe managed by the MultiBackendJobManager +by filling in the process parameters with corresponding row values.

+
+

See also

+

See Job creation based on parameterized processes +for more information and examples.

+
+

Process parameters are linked to dataframe columns by name. +While this intuitive name-based matching should cover most use cases, +there are additional options for overrides or fallbacks:

+
    +
  • When provided, parameter_column_map will be consulted +for resolving a process parameter name (key in the dictionary) +to a desired dataframe column name (corresponding value).

  • +
  • One common case is handled automatically as convenience functionality.

    +

    When:

    +
      +
    • parameter_column_map is not provided (or set to None),

    • +
    • and there is a single parameter that accepts inline GeoJSON geometries,

    • +
    • and the dataframe is a GeoPandas dataframe with a single geometry column,

    • +
    +

    then this parameter and this geometries column will be linked automatically.

    +
  • +
  • If a parameter can not be matched with a column by name as described above, +a default value will be picked, +first by looking in parameter_defaults (if provided), +and then by looking up the default value from the parameter schema in the process definition.

  • +
  • Finally if no (default) value can be determined and the parameter +is not flagged as optional, an error will be raised.

  • +
+
+
Parameters:
+
    +
  • process_id (Optional[str]) – (optional) openEO process identifier. +Can be omitted when working with a remote process definition +that is fully defined with a URL in the namespace parameter.

  • +
  • namespace (Optional[str]) – (optional) openEO process namespace. +Typically used to provide a URL to a remote process definition.

  • +
  • parameter_defaults (Optional[dict]) – (optional) default values for process parameters, +to be used when not available in the dataframe managed by +MultiBackendJobManager.

  • +
  • parameter_column_map (Optional[dict]) – Optional overrides +for linking process parameters to dataframe columns: +mapping of process parameter names as key +to dataframe column names as value.

  • +
+
+
+
+

Added in version 0.33.0.

+
+
+

Warning

+

This is an experimental API subject to change, +and we greatly welcome +feedback and suggestions for improvement.

+
+
+
+__call__(*arg, **kwargs)[source]
+

Syntactic sugar for calling start_job().

+
+
Return type:
+

BatchJob

+
+
+
+ +
+
+start_job(row, connection, **_)[source]
+

Implementation of the start_job callable interface +of MultiBackendJobManager.run_jobs() +to create a job based on given dataframe row

+
+
Parameters:
+
    +
  • row (Series) – The row in the pandas dataframe that stores the jobs state and other tracked data.

  • +
  • connection (Connection) – The connection to the backend.

  • +
+
+
Return type:
+

BatchJob

+
+
+
+ +
+ +
+
+

Job creation based on parameterized processes

+

The openEO API supports parameterized processes out of the box, +which allows to work with flexible, reusable openEO building blocks +in the form of user-defined processes +or remote openEO process definitions. +This can also be leveraged for job creation in the context of the +MultiBackendJobManager: +define a “template” job as a parameterized process +and let the job manager fill in the parameters +from a given data frame.

+

The ProcessBasedJobCreator helper class +allows to do exactly that. +Given a reference to a parameterized process, +such as a user-defined process or remote process definition, +it can be used directly as start_job callable to +run_jobs() +which will fill in the process parameters from the dataframe.

+
+

Basic ProcessBasedJobCreator example

+

Basic usage example with a remote process definition:

+
+
Basic ProcessBasedJobCreator example snippet
+
 1from openeo.extra.job_management import (
+ 2    MultiBackendJobManager,
+ 3    create_job_db,
+ 4    ProcessBasedJobCreator,
+ 5)
+ 6
+ 7# Job creator, based on a parameterized openEO process
+ 8# (specified by the remote process definition at given URL)
+ 9# which has parameters "start_date" and "bands" for example.
+10job_starter = ProcessBasedJobCreator(
+11    namespace="https://example.com/my_process.json",
+12    parameter_defaults={
+13        "bands": ["B02", "B03"],
+14    },
+15)
+16
+17# Initialize job database from a dataframe,
+18# with desired parameter values to fill in.
+19df = pd.DataFrame({
+20    "start_date": ["2021-01-01", "2021-02-01", "2021-03-01"],
+21})
+22job_db = create_job_db("jobs.csv").initialize_from_df(df)
+23
+24# Create and run job manager,
+25# which will start a job for each of the `start_date` values in the dataframe
+26# and use the default band list ["B02", "B03"] for the "bands" parameter.
+27job_manager = MultiBackendJobManager(...)
+28job_manager.run_jobs(job_db=job_db, start_job=job_starter)
+
+
+
+

In this example, a ProcessBasedJobCreator is instantiated +based on a remote process definition, +which has parameters start_date and bands. +When passed to run_jobs(), +a job for each row in the dataframe will be created, +with parameter values based on matching columns in the dataframe:

+
    +
  • the start_date parameter will be filled in +with the values from the “start_date” column of the dataframe,

  • +
  • the bands parameter has no corresponding column in the dataframe, +and will get its value from the default specified in the parameter_defaults argument.

  • +
+
+
+

ProcessBasedJobCreator with geometry handling

+

Apart from the intuitive name-based parameter-column linking, +ProcessBasedJobCreator +also automatically links:

+
    +
  • a process parameters that accepts inline GeoJSON geometries/features +(which practically means it has a schema like {"type": "object", "subtype": "geojson"}, +as produced by Parameter.geojson).

  • +
  • with the geometry column in a GeoPandas dataframe.

  • +
+

even if the name of the parameter does not exactly match +the name of the GeoPandas geometry column (geometry by default). +This automatic liking is only done if there is only one +GeoJSON parameter and one geometry column in the dataframe.

+
+

to do

+

Add example with geometry handling.

+
+
+
+
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/cookbook/localprocessing.html b/cookbook/localprocessing.html new file mode 100644 index 000000000..ac13ceff8 --- /dev/null +++ b/cookbook/localprocessing.html @@ -0,0 +1,307 @@ + + + + + + + + Client-side (local) processing — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +
+

Client-side (local) processing

+
+

Warning

+

This is a new experimental feature and API, subject to change.

+
+
+

Background

+

The client-side processing functionality allows to test and use openEO with its processes locally, i.e. without any connection to an openEO back-end. +It relies on the projects openeo-pg-parser-networkx, which provides an openEO process graph parsing tool, and openeo-processes-dask, which provides an Xarray and Dask implementation of most openEO processes.

+
+
+

Installation

+
+

Note

+

This feature requires Python>=3.9. +Tested with openeo-pg-parser-networkx==2023.5.1 and +openeo-processes-dask==2023.7.1.

+
+
pip install openeo[localprocessing]
+
+
+
+
+

Usage

+

Every openEO process graph relies on data which is typically provided by a cloud infrastructure (the openEO back-end). +The client-side processing adds the possibility to read and use local netCDFs, geoTIFFs, ZARR files, and remote STAC Collections or Items for your experiments.

+
+

STAC Collections and Items

+
+

Warning

+

The provided examples using STAC rely on third party STAC Catalogs, we can’t guarantee that the urls will remain valid.

+
+

With the load_stac process it’s possible to load and use data provided by remote or local STAC Collections or Items. +The following code snippet loads Sentinel-2 L2A data from a public STAC Catalog, using specific spatial and temporal extent, band name and also properties for cloud coverage.

+
>>> from openeo.local import LocalConnection
+>>> local_conn = LocalConnection("./")
+
+>>> url = "https://earth-search.aws.element84.com/v1/collections/sentinel-2-l2a"
+>>> spatial_extent = {"west": 11, "east": 12, "south": 46, "north": 47}
+>>> temporal_extent = ["2019-01-01", "2019-06-15"]
+>>> bands = ["red"]
+>>> properties = {"eo:cloud_cover": dict(lt=50)}
+>>> s2_cube = local_conn.load_stac(url=url,
+...    spatial_extent=spatial_extent,
+...    temporal_extent=temporal_extent,
+...    bands=bands,
+...    properties=properties,
+... )
+>>> s2_cube.execute()
+<xarray.DataArray 'stackstac-08730b1b5458a4ed34edeee60ac79254' (time: 177,
+                                                                band: 1,
+                                                                y: 11354,
+                                                                x: 8025)>
+dask.array<getitem, shape=(177, 1, 11354, 8025), dtype=float64, chunksize=(1, 1, 1024, 1024), chunktype=numpy.ndarray>
+Coordinates: (12/53)
+  * time                                     (time) datetime64[ns] 2019-01-02...
+    id                                       (time) <U24 'S2B_32TPR_20190102_...
+  * band                                     (band) <U3 'red'
+  * x                                        (x) float64 6.52e+05 ... 7.323e+05
+  * y                                        (y) float64 5.21e+06 ... 5.096e+06
+    s2:product_uri                           (time) <U65 'S2B_MSIL2A_20190102...
+    ...                                       ...
+    raster:bands                             object {'nodata': 0, 'data_type'...
+    gsd                                      int32 10
+    common_name                              <U3 'red'
+    center_wavelength                        float64 0.665
+    full_width_half_max                      float64 0.038
+    epsg                                     int32 32632
+Attributes:
+    spec:        RasterSpec(epsg=32632, bounds=(600000.0, 4990200.0, 809760.0...
+    crs:         epsg:32632
+    transform:   | 10.00, 0.00, 600000.00|\n| 0.00,-10.00, 5300040.00|\n| 0.0...
+    resolution:  10.0
+
+
+
+
+

Local Collections

+

If you want to use our sample data, please clone this repository:

+
git clone https://github.com/Open-EO/openeo-localprocessing-data.git
+
+
+

With some sample data we can now check the STAC metadata for the local files by doing:

+
from openeo.local import LocalConnection
+local_data_folders = [
+    "./openeo-localprocessing-data/sample_netcdf",
+    "./openeo-localprocessing-data/sample_geotiff",
+]
+local_conn = LocalConnection(local_data_folders)
+local_conn.list_collections()
+
+
+

This code will parse the metadata content of each netCDF, geoTIFF or ZARR file in the provided folders and return a JSON object containing the STAC representation of the metadata. +If this code is run in a Jupyter Notebook, the metadata will be rendered nicely.

+
+

Tip

+

The code expects local files to have a similar structure to the sample files +provided at github.com/Open-EO/openeo-localprocessing-data. +If the code can not handle you special netCDF, +you can still modify the function that reads the metadata from it (openeo/local/collections.py#L19) +and the function that reads the data (openeo/local/processing.py#L26).

+
+
+
+

Local Processing

+

Let’s start with the provided sample netCDF of Sentinel-2 data:

+
>>> local_collection = "openeo-localprocessing-data/sample_netcdf/S2_L2A_sample.nc"
+>>> s2_datacube = local_conn.load_collection(local_collection)
+>>> # Check if the data is loaded correctly
+>>> s2_datacube.execute()
+<xarray.DataArray (bands: 5, t: 12, y: 705, x: 935)>
+dask.array<stack, shape=(5, 12, 705, 935), dtype=float32, chunksize=(1, 12, 705, 935), chunktype=numpy.ndarray>
+Coordinates:
+  * t        (t) datetime64[ns] 2022-06-02 2022-06-05 ... 2022-06-27 2022-06-30
+  * x        (x) float64 6.75e+05 6.75e+05 6.75e+05 ... 6.843e+05 6.843e+05
+  * y        (y) float64 5.155e+06 5.155e+06 5.155e+06 ... 5.148e+06 5.148e+06
+    crs      |S1 ...
+  * bands    (bands) object 'B04' 'B03' 'B02' 'B08' 'SCL'
+Attributes:
+    Conventions:  CF-1.9
+    institution:  openEO platform - Geotrellis backend: 0.9.5a1
+    description:
+    title:
+
+
+

As you can see in the previous example, we are using a call to execute() which will execute locally the generated openEO process graph. +In this case, the process graph consist only in a single load_collection, which performs lazy loading of the data. With this first step you can check if the data is being read correctly by openEO.

+

Looking at the metadata of this netCDF sample, we can see that it contains the bands B04, B03, B02, B08 and SCL. +Additionally, we also see that it is composed by more than one element in time and that it covers the month of June 2022.

+

We can now do a simple processing for demo purposes, let’s compute the median NDVI in time and visualize the result:

+
b04 = s2_datacube.band("B04")
+b08 = s2_datacube.band("B08")
+ndvi = (b08 - b04) / (b08 + b04)
+ndvi_median = ndvi.reduce_dimension(dimension="t", reducer="median")
+result_ndvi = ndvi_median.execute()
+result_ndvi.plot.imshow(cmap="Greens")
+
+
+../_images/local_ndvi.jpg +

We can perform the same example using data provided by STAC Collection:

+
from openeo.local import LocalConnection
+local_conn = LocalConnection("./")
+
+url = "https://earth-search.aws.element84.com/v1/collections/sentinel-2-l2a"
+spatial_extent =  {"east": 11.40, "north": 46.52, "south": 46.46, "west": 11.25}
+temporal_extent = ["2022-06-01", "2022-06-30"]
+bands = ["red", "nir"]
+properties = {"eo:cloud_cover": dict(lt=80)}
+s2_datacube = local_conn.load_stac(
+    url=url,
+    spatial_extent=spatial_extent,
+    temporal_extent=temporal_extent,
+    bands=bands,
+    properties=properties,
+)
+
+b04 = s2_datacube.band("red")
+b08 = s2_datacube.band("nir")
+ndvi = (b08 - b04) / (b08 + b04)
+ndvi_median = ndvi.reduce_dimension(dimension="time", reducer="median")
+result_ndvi = ndvi_median.execute()
+
+
+
+
+
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/cookbook/sampling.html b/cookbook/sampling.html new file mode 100644 index 000000000..9b34129a9 --- /dev/null +++ b/cookbook/sampling.html @@ -0,0 +1,193 @@ + + + + + + + + Dataset sampling — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +
+

Dataset sampling

+

A number of use cases do not require a full datacube to be computed, +but rather want to extract a result at specific locations. +Examples include extracting training data for model calibration, or computing the result for +areas where validation data is available.

+

An important constraint is that most implementations assume that sampling is an operation +on relatively small areas, of for instance up to 512x512 pixels (but often much smaller). +When extracting larger areas, it is recommended to look into running a separate job per ‘sample’.

+

Sampling can be done for points or polygons:

+
    +
  • point extractions basically result in a ‘vector cube’, so can be exported into tabular formats.

  • +
  • polygon extractions can be stored to an individual netCDF per polygon so in this case the output is a sparse raster cube.

  • +
+

To indicate to openEO that we only want to compute the datacube for certain polygon features, we use the +openeo.rest.datacube.DataCube.filter_spatial method.

+

Next to that, we will also indicate that we want to write multiple output files. This is more convenient, as we will +want to have one or more raster outputs per sampling feature, for convenient further processing. To do this, we set +the ‘sample_by_feature’ output format property, which is available for the netCDF and GTiff output formats.

+

Combining all of this, results in the following sample code:

+
s2_bands = auth_connection.load_collection(
+    "SENTINEL2_L2A",
+    bands=["B04"],
+    temporal_extent=["2020-05-01", "2020-06-01"],
+)
+s2_bands = s2_bands.filter_spatial(
+    "https://artifactory.vgt.vito.be/testdata-public/parcels/test_10.geojson",
+)
+job = s2_bands.create_job(
+    title="Sentinel2",
+    description="Sentinel-2 L2A bands",
+    out_format="netCDF",
+    sample_by_feature=True,
+)
+
+
+

Sampling only works for batch jobs, because it results in multiple output files, which can not be conveniently transferred +in a synchronous call.

+
+

Performance & scalability

+

It’s important to note that dataset sampling is not necessarily a cheap operation, since creation of a sparse datacube still +may require accessing a large number of raw EO assets. Backends of course can and should optimize to restrict processing +to a minimum, but the size of the required input datasets is often a determining factor for cost and performance rather +than the size of the output dataset.

+
+
+

Sampling at scale

+

When doing large scale (e.g. continental) sampling, it is usually not possible or impractical to run it as a single openEO +batch job. The recommendation here is to apply a spatial grouping to your sampling locations, with a single group covering +an area of around 100x100km. The optimal size of a group may be backend dependant. Also remember that when working with +data in the UTM projection, you may want to avoid covering multiple UTM zones in a single group.

+
+
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/cookbook/spectral_indices.html b/cookbook/spectral_indices.html new file mode 100644 index 000000000..7e2f3e530 --- /dev/null +++ b/cookbook/spectral_indices.html @@ -0,0 +1,450 @@ + + + + + + + + Spectral Indices — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +
+

Spectral Indices

+
+

Warning

+

This is a new experimental API, subject to change.

+
+

openeo.extra.spectral_indices is an auxiliary subpackage +to simplify the calculation of common spectral indices +used in various Earth observation applications (vegetation, water, urban etc.). +It leverages the spectral indices defined in the +Awesome Spectral Indices project +by David Montero Loaiza.

+
+

Added in version 0.9.1.

+
+
+

Band mapping

+

The formulas provided by “Awesome Spectral Indices” are defined in terms of standardized variable names +like “B” for blue, “R” for red, “N” for near-infrared, “WV” for water vapour, etc.

+
"NDVI": {
+     "formula": "(N - R)/(N + R)",
+     "long_name": "Normalized Difference Vegetation Index",
+
+
+

Obviously, these formula variables have to be mapped properly to the band names of your cube.

+
+

Automatic band mapping

+

In most simple cases, when there is enough collection metadata +to automatically detect the satellite platform (Sentinel2, Landsat8, ..) +and the original band names haven’t been renamed, +this mapping will be handled automatically, e.g.:

+
cube = connection.load_collection("SENTINEL2_L2A", ...)
+indices = compute_indices(cube, indices=["NDVI", "NDMI"])
+
+
+
+
+

Manual band mapping

+

In more complex cases, it might be necessary to specify some additional information to guide the band mapping. +If the band names follow the standard, but it’s just the satellite platform can not be guessed +from the collection metadata, it is typically enough to specify the platform explicitly:

+
indices = compute_indices(
+    cube,
+    indices=["NDVI", "NDMI"],
+    platform="SENTINEL2",
+)
+
+
+

Additionally, if the band names in your cube have been renamed, deviating from conventions, it is also +possible to explicitly specify the band name to spectral index variable name mapping:

+
indices = compute_indices(
+    cube,
+    indices=["NDVI", "NDMI"],
+    variable_map={
+        "R": "S2-red",
+        "N": "S2-nir",
+        "S1": "S2-swir",
+    },
+)
+
+
+
+

Added in version 0.26.0: Function arguments platform and variable_map to fine-tune the band mapping.

+
+
+
+
+

API

+
+
+openeo.extra.spectral_indices.append_and_rescale_indices(datacube, index_dict, *, variable_map=None, platform=None)[source]
+

Computes a list of indices from a datacube and appends them to the existing datacube

+
+
Parameters:
+
    +
  • datacube (DataCube) – input data cube

  • +
  • index_dict (dict) –

    a dictionary that contains the input- and output range of the collection on which you calculate the indices +as well as the indices that you want to calculate with their responding input- and output ranges +It follows the following format:

    +
    {
    +    "collection": {
    +        "input_range": [0,8000],
    +        "output_range": [0,250]
    +    },
    +    "indices": {
    +        "NDVI": {
    +            "input_range": [-1,1],
    +            "output_range": [0,250]
    +        },
    +    }
    +}
    +
    +
    +

    See list_indices() for supported indices.

    +

  • +
  • variable_map (Optional[Dict[str, str]]) – (optional) mapping from Awesome Spectral Indices formula variable to actual cube band names. +To be specified if the given data cube has non-standard band names, +or the satellite platform can not be recognized from the data cube metadata. +See Manual band mapping for more information.

  • +
  • platform (Optional[str]) – optionally specify the satellite platform (to determine band name mapping) +if the given data cube has no or an unhandled collection id in its metadata. +See Manual band mapping for more information.

  • +
+
+
Return type:
+

DataCube

+
+
Returns:
+

data cube with appended indices

+
+
+
+

Warning

+

this “rescaled” index helper uses an experimental API (e.g. index_dict argument) that is subject to change.

+
+
+

Added in version 0.26.0: Added variable_map and platform arguments.

+
+
+ +
+
+openeo.extra.spectral_indices.append_index(datacube, index, *, variable_map=None, platform=None)[source]
+

Compute a single spectral index and append it to the given data cube.

+
+
Parameters:
+
    +
  • cube – input data cube

  • +
  • index (str) – name of the index to compute and append. See list_indices() for supported indices.

  • +
  • variable_map (Optional[Dict[str, str]]) – (optional) mapping from Awesome Spectral Indices formula variable to actual cube band names. +To be specified if the given data cube has non-standard band names, +or the satellite platform can not be recognized from the data cube metadata. +See Manual band mapping for more information.

  • +
  • platform (Optional[str]) – optionally specify the satellite platform (to determine band name mapping) +if the given data cube has no or an unhandled collection id in its metadata. +See Manual band mapping for more information.

  • +
+
+
Return type:
+

DataCube

+
+
Returns:
+

data cube with appended index

+
+
+
+

Added in version 0.26.0: Added variable_map and platform arguments.

+
+
+ +
+
+openeo.extra.spectral_indices.append_indices(datacube, indices, *, variable_map=None, platform=None)[source]
+

Compute multiple spectral indices and append them to the given data cube.

+
+
Parameters:
+
    +
  • datacube (DataCube) – input data cube

  • +
  • indices (List[str]) – list of names of the indices to compute and append. See list_indices() for supported indices.

  • +
  • variable_map (Optional[Dict[str, str]]) – (optional) mapping from Awesome Spectral Indices formula variable to actual cube band names. +To be specified if the given data cube has non-standard band names, +or the satellite platform can not be recognized from the data cube metadata. +See Manual band mapping for more information.

  • +
  • platform (Optional[str]) – optionally specify the satellite platform (to determine band name mapping) +if the given data cube has no or an unhandled collection id in its metadata. +See Manual band mapping for more information.

  • +
+
+
Return type:
+

DataCube

+
+
Returns:
+

data cube with appended indices

+
+
+
+

Added in version 0.26.0: Added variable_map and platform arguments.

+
+
+ +
+
+openeo.extra.spectral_indices.compute_and_rescale_indices(datacube, index_dict, *, append=False, variable_map=None, platform=None)[source]
+

Computes a list of indices from a data cube

+
+
Parameters:
+
    +
  • datacube (DataCube) – input data cube

  • +
  • index_dict (dict) –

    a dictionary that contains the input- and output range of the collection on which you calculate the indices +as well as the indices that you want to calculate with their responding input- and output ranges +It follows the following format:

    +
    {
    +    "collection": {
    +        "input_range": [0,8000],
    +        "output_range": [0,250]
    +    },
    +    "indices": {
    +        "NDVI": {
    +            "input_range": [-1,1],
    +            "output_range": [0,250]
    +        },
    +    }
    +}
    +
    +
    +

    If you don’t want to rescale your data, you can fill the input-, index- and output-range with None.

    +

    See list_indices() for supported indices.

    +

  • +
  • append (bool) – append the indices as bands to the given data cube +instead of creating a new cube with only the calculated indices

  • +
  • variable_map (Optional[Dict[str, str]]) – (optional) mapping from Awesome Spectral Indices formula variable to actual cube band names. +To be specified if the given data cube has non-standard band names, +or the satellite platform can not be recognized from the data cube metadata. +See Manual band mapping for more information.

  • +
  • platform (Optional[str]) – optionally specify the satellite platform (to determine band name mapping) +if the given data cube has no or an unhandled collection id in its metadata. +See Manual band mapping for more information.

  • +
+
+
Return type:
+

DataCube

+
+
Returns:
+

the datacube with the indices attached as bands

+
+
+
+

Warning

+

this “rescaled” index helper uses an experimental API (e.g. index_dict argument) that is subject to change.

+
+
+

Added in version 0.26.0: Added variable_map and platform arguments.

+
+
+ +
+
+openeo.extra.spectral_indices.compute_index(datacube, index, *, variable_map=None, platform=None)[source]
+

Compute a single spectral index from a data cube.

+
+
Parameters:
+
    +
  • datacube (DataCube) – input data cube

  • +
  • index (str) – name of the index to compute. See list_indices() for supported indices.

  • +
  • variable_map (Optional[Dict[str, str]]) – (optional) mapping from Awesome Spectral Indices formula variable to actual cube band names. +To be specified if the given data cube has non-standard band names, +or the satellite platform can not be recognized from the data cube metadata. +See Manual band mapping for more information.

  • +
  • platform (Optional[str]) – optionally specify the satellite platform (to determine band name mapping) +if the given data cube has no or an unhandled collection id in its metadata. +See Manual band mapping for more information.

  • +
+
+
Return type:
+

DataCube

+
+
Returns:
+

data cube containing the index as band

+
+
+
+

Added in version 0.26.0: Added variable_map and platform arguments.

+
+
+ +
+
+openeo.extra.spectral_indices.compute_indices(datacube, indices, *, append=False, variable_map=None, platform=None)[source]
+

Compute multiple spectral indices from the given data cube.

+
+
Parameters:
+
    +
  • datacube (DataCube) – input data cube

  • +
  • indices (List[str]) – list of names of the indices to compute and append. See list_indices() for supported indices.

  • +
  • append (bool) – append the indices as bands to the given data cube +instead of creating a new cube with only the calculated indices

  • +
  • variable_map (Optional[Dict[str, str]]) – (optional) mapping from Awesome Spectral Indices formula variable to actual cube band names. +To be specified if the given data cube has non-standard band names, +or the satellite platform can not be recognized from the data cube metadata. +See Manual band mapping for more information.

  • +
  • platform (Optional[str]) – optionally specify the satellite platform (to determine band name mapping) +if the given data cube has no or an unhandled collection id in its metadata. +See Manual band mapping for more information.

  • +
+
+
Return type:
+

DataCube

+
+
Returns:
+

data cube containing the indices as bands

+
+
+
+

Added in version 0.26.0: Added variable_map and platform arguments.

+
+
+ +
+
+openeo.extra.spectral_indices.list_indices()[source]
+

List names of supported spectral indices

+
+
Return type:
+

List[str]

+
+
+
+ +
+
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/cookbook/tricks.html b/cookbook/tricks.html new file mode 100644 index 000000000..59eaf79b6 --- /dev/null +++ b/cookbook/tricks.html @@ -0,0 +1,245 @@ + + + + + + + + Miscellaneous tips and tricks — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +
+

Miscellaneous tips and tricks

+
+

Export a process graph

+

You can export the underlying process graph of +a DataCube, VectorCube, etc, +to a standardized JSON format, which allows interoperability with other openEO tools.

+

For example, use print_json() to directly print the JSON representation +in your interactive Jupyter or Python session:

+
>>> dump = cube.print_json()
+{
+  "process_graph": {
+    "loadcollection1": {
+      "process_id": "load_collection",
+...
+
+
+

Or save it to a file, by getting the JSON representation first as a string +with to_json():

+
# Export as JSON string
+dump = cube.to_json()
+
+# Write to file in `pathlib` style
+export_path = pathlib.Path("path/to/export.json")
+export_path.write_text(dump, encoding="utf8")
+
+# Write to file in `open()` style
+with open("path/to/export.json", encoding="utf8") as f:
+    f.write(dump)
+
+
+
+

Warning

+

Avoid using methods like flat_graph(), +which are mainly intended for internal use. +Not only are these methods subject to change, they also lead to representations +with interoperability and reuse issues. +For example, naively printing or automatic (repr) rendering of +flat_graph() output will roughly look like JSON, +but is in fact invalid: it uses single quotes (instead of double quotes) +and booleans values are title-case (instead of lower case).

+
+
+
+

Execute a process graph directly from raw JSON

+

When you have a process graph in JSON format, as a string, a local file or a URL, +you can execute/download it without converting it do a DataCube first. +Just pass the string, path or URL directly to +Connection.download(), +Connection.execute() or +Connection.create_job(). +For example:

+
# `execute` with raw JSON string
+connection.execute("""
+    {
+        "add": {"process_id": "add", "arguments": {"x": 3, "y": 5}, "result": true}
+    }
+""")
+
+# `download` with local path to JSON file
+connection.download("path/to/my-process-graph.json")
+
+# `create_job` with URL to JSON file
+job = connection.create_job("https://jsonbin.example/my/process-graph.json")
+
+
+
+
+

Legacy read_vector usage

+

In versions up to 0.35.0 of the openEO Python client library, +there was an old, deprecated feature in geometry handling +of DataCube methods like +aggregate_spatial() and +mask_polygon() +where you could pass a backend-side path as geometries, e.g.:

+
cube = cube.aggregate_spatial(
+    geometries="/backend/path/to/geometries.json",
+    reducer="mean"
+)
+
+
+

The client would handle this by automatically adding a read_vector process +in the process graph, with that path as argument, to instruct the backend to load the geometries from there. +This read_vector process was however a backend-specific, experimental and now deprecated process. +Moreover, it assumes that the user has access to (or at least knowledge of) the backend’s file system, +which violates the openEO principle of abstracting away backend-specific details.

+

In version 0.36.0, this old deprecated read_vector feature has been removed, +to allow other and better convenience functionality +when providing a string in the geometries argument: +e.g. load from a URL with standard process load_url, +or load GeoJSON from a local clientside path.

+

If your workflow however depends on the old, deprecated read_vector functionality, +it is possible to reconstruct that by manually adding a read_vector process in your workflow, +for example as follows:

+
from openeo.processes import process
+
+cube = cube.aggregate_spatial(
+    geometries=process("read_vector", filename="/backend/path/to/geometries.json"),
+    reducer="mean"
+)
+
+
+

Note that this is also works with older versions of the openEO Python client library.

+
+
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/cookbook/udp_sharing.html b/cookbook/udp_sharing.html new file mode 100644 index 000000000..fe42be311 --- /dev/null +++ b/cookbook/udp_sharing.html @@ -0,0 +1,255 @@ + + + + + + + + Sharing of user-defined processes — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +
+

Sharing of user-defined processes

+
+

Warning

+

Beta feature - +At the time of this writing (July 2021), sharing of user-defined processes +(publicly or among users) is not standardized in the openEO API. +There are however some experimental sharing features in the openEO Python Client Library +and some back-end providers that we are going to discuss here.

+

Be warned that the details of this feature are subject to change. +For more status information, consult GitHub ticket +Open-EO/openeo-api#310.

+
+
+

Publicly publishing a user-defined process.

+

As discussed in Building and storing user-defined process, user-defined processes can be +stored with the save_user_defined_process() method +on a on a back-end Connection. +By default, these user-defined processes are private and only accessible by the user that saved it:

+
from openeo.processes import subtract, divide
+from openeo.api.process import Parameter
+
+# Build user-defined process
+f = Parameter.number("f", description="Degrees Fahrenheit.")
+fahrenheit_to_celsius = divide(x=subtract(x=f, y=32), y=1.8)
+
+# Store user-defined process in openEO back-end.
+udp = connection.save_user_defined_process(
+    "fahrenheit_to_celsius",
+    fahrenheit_to_celsius,
+    parameters=[f]
+)
+
+
+

Some back-ends, like the VITO/Terrascope back-end allow a user to flag a user-defined process as “public” +so that other users can access its description and metadata:

+
udp = connection.save_user_defined_process(
+    ...
+    public=True
+)
+
+
+

The sharable, public URL of this user-defined process is available from the metadata given by +RESTUserDefinedProcess.describe. +It’s listed as “canonical” link:

+
>>> udp.describe()
+{
+    "id": "fahrenheit_to_celsius",
+    "links": [
+        {
+            "rel": "canonical",
+            "href": "https://openeo.vito.be/openeo/1.0/processes/u:johndoe/fahrenheit_to_celsius",
+            "title": "Public URL for user-defined process fahrenheit_to_celsius"
+        }
+    ],
+    ...
+
+
+
+
+

Using a public UDP through URL based “namespace”

+

Some back-ends, like the VITO/Terrascope back-end, allow to use a public UDP +through setting its public URL as the namespace property of the process graph node.

+

For example, based on the fahrenheit_to_celsius UDP created above, +the “flat graph” representation of a process graph could look like this:

+
{
+    ...
+    "to_celsius": {
+        "process_id": "fahrenheit_to_celsius",
+        "namespace": "https://openeo.vito.be/openeo/1.0/processes/u:johndoe/fahrenheit_to_celsius",
+        "arguments": {"f": 86}
+    }
+
+
+

As a very basic illustration with the openEO Python Client library, +we can create and evaluate a process graph, +containing a fahrenheit_to_celsius call as single process, +with Connection.datacube_from_process as follows:

+
cube = connection.datacube_from_process(
+    process_id="fahrenheit_to_celsius",
+    namespace="https://openeo.vito.be/openeo/1.0/processes/u:johndoe/fahrenheit_to_celsius",
+    f=86
+)
+print(cube.execute())
+# Prints: 30.0
+
+
+
+
+

Loading a published user-defined process as DataCube

+

From the public URL of the user-defined process, +it is also possible for another user to construct, fully client-side, +a new DataCube +with Connection.datacube_from_json().

+

It is important to note that this approach is different from calling +a user-defined process as described in Evaluate user-defined processes and Using a public UDP through URL based “namespace”. +Connection.datacube_from_json() +breaks open the encapsulation of the user-defined process and “unrolls” the process graph inside +into a new DataCube. +This also implies that parameters defined in the user-defined process have to be provided when calling +Connection.datacube_from_json():

+
udp_url = "https://openeo.vito.be/openeo/1.0/processes/u:johndoe/fahrenheit_to_celsius"
+cube = connection.datacube_from_json(
+    udp_url,
+    parameters={"f": 86},
+)
+print(cube.execute())
+# Prints: 30.0
+
+
+

Note that Connection.datacube_from_json() +not only supports loading UDPs from an URL but also from a raw JSON string or a local file path. +For more information, also see Construct a DataCube from JSON.

+
+
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/data_access.html b/data_access.html new file mode 100644 index 000000000..aeb207bdc --- /dev/null +++ b/data_access.html @@ -0,0 +1,412 @@ + + + + + + + + Finding and loading data — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +
+

Finding and loading data

+

As illustrated in the basic concepts, most openEO scripts start with load_collection, but this skips the step of +actually finding out which collection to load. This section dives a bit deeper into finding the right data, and some more +advanced data loading use cases.

+
+

Data discovery

+

To explore data in a given back-end, it is recommended to use a more visual tool like the openEO Hub +(http://hub.openeo.org/). This shows available collections, and metadata in a user-friendly manner.

+

Next to that, the client also offers various Connection methods +to explore collections and their metadata:

+ +

When using these methods inside a Jupyter notebook, you should notice that the output is rendered in a user friendly way.

+

In a regular script, these methods can be used to programmatically find a collection that matches specific criteria.

+

As a user, make sure to carefully read the documentation for a given collection, as there can be important differences. +You should also be aware of the data retention policy of a given collection: some data archives only retain the last 3 months +for instance, making them only suitable for specific types of analysis. Such differences can have an impact on the reproducibility +of your openEO scripts.

+

Also note that the openEO metadata may use links to point to much more information for a particular collection. For instance +technical specification on how the data was preprocessed, or viewers that allow you to visually explore the data. This can +drastically improve your understanding of the dataset.

+

Finally, licensing information is important to keep an eye on: not all data is free and open.

+
+

Initial exploration of an openEO collection

+

A common question from users is about very specific details of a collection, we’d like to list some examples and solutions here:

+
    +
  • The collection data type, and range of values, can be determined by simply downloading a sample of data, as NetCDF or Geotiff. This can in fact be done at any point in the design of your script, to get a good idea of intermediate results.

  • +
  • Data availability, and available timestamps can be retrieved by computing average values for your area of interest. Just construct a polygon, and retrieve those statistics. For optical data, this can also be used to get an idea on cloud statistics.

  • +
  • Most collections have a native projection system, again a simple download will give you this information if its not clear from the metadata.

  • +
+
+
+
+

Loading a data cube from a collection

+

Many examples already illustrate the basic openEO load_collection process through a Connection.load_collection() call, +with filters on space, time and bands. +For example:

+
cube = connection.load_collection(
+    "SENTINEL2_L2A",
+    spatial_extent={"west": 3.75, "east": 4.08, "south": 51.29, "north": 51.39},
+    temporal_extent=["2021-05-07", "2021-05-14"],
+    bands=["B04", "B03", "B02"],
+)
+
+
+

The purpose of these filters in load_collection is to reduce the amount of raw data that is loaded (and processed) by the back-end. +This is essential to get a response to your processing request in reasonable time and keep processing costs low. +It’s recommended to start initial exploration with a small spatio-temporal extent +and gradually increase the scope once initial tests work out.

+

Next to specifying filters inside the load_collection process, +there are also possibilities to filter with separate filter processes, e.g. at a later stage in your process graph. +For most openEO back-ends, the following example snippet should be equivalent to the previous:

+
cube = connection.load_collection("SENTINEL2_L2A")
+cube = cube.filter_bbox(west=3.75, east=4.08, south=51.29, north=51.39)
+cube = cube.filter_temporal("2021-05-07", "2021-05-14")
+cube = cube.filter_bands(["B04", "B03", "B02"])
+
+
+

Another nice feature is that processes that work with geometries or vector features +(e.g. aggregated statistics for a polygon, or masking by polygon) +can also be used by a back-end to automatically infer an appropriate spatial extent. +This way, you do not need to explicitly set these filters yourself.

+

In the following sections, we want to dive a bit into details, and more advanced cases.

+
+
+

Filter on spatial extent

+

A spatial extent is a bounding box that specifies the minimum and and maximum longitude and latitude of the region of interest you want to process.

+

By default these latitude and longitude values are expressed in the standard Coordinate Reference System for the world, +which is EPSG:4326, also known as “WGS 84”, or just “lat-long”.

+
connection.load_collection(
+    ...,
+    spatial_extent={"west": 5.14, "south": 51.17, "east": 5.17, "north": 51.19},
+)
+
+
+
+
+

Filter on temporal extent

+

Usually you don’t need the complete time range provided by a collection +and you should specify an appropriate time window to load +as a temporal_extent pair containing a start and end date:

+
connection.load_collection(
+    ...,
+    temporal_extent=["2021-05-07", "2021-05-14"],
+)
+
+
+

In most use cases, day-level granularity is enough and you can just express the dates as strings in the format "yyyy-mm-dd". +You can also pass datetime.date objects (from Python standard library) if you already have your dates in that format.

+
+

Note

+

When you need finer, time-level granularity, you can pass datetime.datetime objects. +Or, when passed as a string, the openEO API requires date and time to be provided in RFC 3339 format. +For example for for 2020-03-17 at 12:34:56 in UTC:

+
"2020-03-17T12:34:56Z"
+
+
+
+
+

Left-closed intervals: start included, end excluded

+

Time ranges in openEO processes like load_collection and filter_temporal are handled as left-closed (“half-open”) temporal intervals: +the start instant is included in the interval, but the end instant is excluded from the interval.

+

For example, the interval defined by ["2020-03-05", "2020-03-15"] covers observations +from 2020-03-05 up to (and including) 2020-03-14 (just before midnight), +but does not include observations from 2020-03-15.

+
          2020-03-05                             2020-03-14   2022-03-15
+________|____________|_________________________|____________|____________|_____
+
+        [--------------------------------------------------(O
+    including                                           excluding
+2020-03-05 00:00:00.000                             2020-03-15 00:00:00.000
+
+
+

While this might look unintuitive at first, +working with half-open intervals avoids common and hard to discover pitfalls when combining multiple intervals, +like unintended window overlaps or double counting observations at interval borders.

+
+
+

Year/month shorthand notation

+
+

Note

+

Year/month shorthand notation handling is available since version 0.23.0.

+
+
+

Rounding down periods to dates

+

The openEO Python Client Library supports some shorthand notations for the temporal extent, +which come in handy if you work with year/month based temporal intervals. +Date strings that only consist of a year or a month will be automatically +“rounded down” to the first day of that period. For example:

+
"2023"    -> "2023-01-01"
+"2023-08" -> "2023-08-01"
+
+
+

This approach fits best with left-closed interval handling.

+

For example, the following two load_collection calls are equivalent:

+
# Filter for observations in 2021 (left-closed interval).
+connection.load_collection(temporal_extent=["2021", "2022"], ...)
+# The above is shorthand for:
+connection.load_collection(temporal_extent=["2021-01-01", "2022-01-01"], ...)
+
+
+

The same applies for filter_temporal(), +which has a couple of additional call forms. +All these calls are equivalent:

+
# Filter for March, April and May (left-closed interval)
+cube = cube.filter_temporal("2021-03", "2021-06")
+cube = cube.filter_temporal(["2021-03", "2021-06"])
+cube = cube.filter_temporal(start_date="2021-03", end_date="2021-06")
+cube = cube.filter_temporal(extent=("2021-03", "2021-06"))
+
+# The above are shorthand for:
+cube = cube.filter_temporal("2021-03-01", "2022-06-01")
+
+
+
+
+

Single string temporal extents

+

Apart from rounding down year or month string, the openEO Python Client Library provides an additional +extent handling feature in methods like +Connection.load_collection(temporal_extent=...) +and DataCube.filter_temporal(extent=...). +Normally, the extent argument should be a list or tuple containing start and end date, +but if a single string is given, representing a year, month (or day) period, +it is automatically expanded to the appropriate interval, +again following the left-closed interval principle. +For example:

+
extent="2022"        ->  extent=("2022-01-01", "2023-01-01")
+extent="2022-05"     ->  extent=("2022-05-01", "2022-06-01")
+extent="2022-05-17"  ->  extent=("2022-05-17", "2022-05-18")
+
+
+

The following snippet shows some examples of equivalent calls:

+
connection.load_collection(temporal_extent="2022", ...)
+# The above is shorthand for:
+connection.load_collection(temporal_extent=("2022-01-01", "2023-01-01"), ...)
+
+
+cube = cube.filter_temporal(extent="2021-03")
+# The above are shorthand for:
+cube = cube.filter_temporal(extent=("2021-03-01", "2022-04-01"))
+
+
+
+
+
+
+

Filter on collection properties

+

Although openEO presents data in a data cube, a lot of collections are still backed by a product based catalog. This +allows filtering on properties of that catalog.

+

A very common use case is to pre-filter Sentinel-2 products on cloud cover. +This avoids loading clouded data unnecessarily and increases performance. +Connection.load_collection() provides +a dedicated max_cloud_cover argument (shortcut for the eo:cloud_cover property) for that:

+
connection.load_collection(
+    "SENTINEL2_L2A",
+    ...,
+    max_cloud_cover=80,
+)
+
+
+

For more general cases, you can use the properties argument to filter on any collection property. +For example, to filter on the relative orbit number of SAR data:

+
connection.load_collection(
+    "SENTINEL1_GRD",
+    ...,
+    properties={
+        "relativeOrbitNumber": lambda x: x==116
+    },
+)
+
+
+

Version 0.26.0 of the openEO Python Client Library adds +collection_property() +which makes defining such property filters more user-friendly by avoiding the lambda construct:

+
import openeo
+
+connection.load_collection(
+    "SENTINEL1_GRD",
+    ...,
+    properties=[
+        openeo.collection_property("relativeOrbitNumber") == 116,
+    ],
+)
+
+
+

Note that property names follow STAC metadata conventions, but some collections can have different names.

+

Property filters in openEO are also specified by small process graphs, that allow the use of the same generic processes +defined by openEO. This is the ‘lambda’ process that you see in the property dictionary. Do note that not all processes +make sense for product filtering, and can not always be properly translated into the query language of the catalog. +Hence, some experimentation may be needed to find a filter that works.

+

One important caveat in this example is that ‘relativeOrbitNumber’ is a catalog specific property name. Meaning that +different archives may choose a different name for a given property, and the properties that are available can depend +on the collection and the catalog that is used by it. This is not a problem caused by openEO, but by the limited +standardization between catalogs of EO data.

+
+
+

Handling large vector data sets

+

For simple use cases, it is common to directly embed geometries (vector data) in your openEO process graph. +Unfortunately, with large vector data sets this leads to very large process graphs +and you might hit certain limits, +resulting in HTTP errors like 413 Request Entity Too Large or 413 Payload Too Large.

+

This problem can be circumvented by first uploading your vector data to a file sharing service +(like Google Drive, DropBox, GitHub, …) +and use its public URL in the process graph instead +through Connection.vectorcube_from_paths. +For example, as follows:

+
# Load vector data from URL
+url = "https://github.com/Open-EO/openeo-python-client/raw/master/tests/data/example_aoi.pq"
+parcels = connection.vectorcube_from_paths([url], format="parquet")
+
+# Use the parcel vector data, for example to do aggregation.
+cube = connection.load_collection(
+    "SENTINEL2_L2A",
+    bands=["B04", "B03", "B02"],
+    temporal_extent=["2021-05-12", "2021-06-01"],
+)
+aggregations = cube.aggregate_spatial(
+    geometries=parcels,
+    reducer="mean",
+)
+
+
+

Note that while openEO back-ends typically support multiple vector formats, like GeoJSON and GeoParquet, +it is usually recommended to use a compact format like GeoParquet, instead of GeoJSON. The list of supported formats +is also advertised by the backend, and can be queried with +Connection.list_file_formats.

+
+
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/datacube_construction.html b/datacube_construction.html new file mode 100644 index 000000000..920edfd23 --- /dev/null +++ b/datacube_construction.html @@ -0,0 +1,344 @@ + + + + + + + + DataCube construction — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +
+

DataCube construction

+
+

The load_collection process

+

The most straightforward way to start building your openEO data cube is through the load_collection process. +As mentioned earlier, this is provided by the +load_collection() method +on a Connection object, +which produces a DataCube instance. +For example:

+
cube = connection.load_collection("SENTINEL2_TOC")
+
+
+

While this should cover the majority of use cases, +there some cases +where one wants to build a DataCube object +from something else or something more than just a simple load_collection process.

+
+
+

Construct DataCube from process

+

Through user-defined processes one can encapsulate +one or more load_collection processes and additional processing steps in a single +reusable user-defined process. +For example, imagine a user-defined process “masked_s2” +that loads an openEO collection “SENTINEL2_TOC” and applies some kind of cloud masking. +The implementation details of the cloud masking are not important here, +but let’s assume there is a parameter “dilation” to fine-tune the cloud mask. +Also note that the collection id “SENTINEL2_TOC” is hardcoded in the user-defined process.

+

We can now construct a data cube from this user-defined process +with datacube_from_process() +as follows:

+
cube = connection.datacube_from_process("masked_s2", dilation=10)
+
+# Further processing of the cube:
+cube = cube.filter_temporal("2020-09-01", "2020-09-10")
+
+
+

Note that datacube_from_process() can be +used with all kind of processes, not only user-defined processes. +For example, while this is not exactly a real EO data use case, +it will produce a valid openEO process graph that can be executed:

+
>>> cube = connection.datacube_from_process("mean", data=[2, 3, 5, 8])
+>>> cube.execute()
+4.5
+
+
+
+
+

Construct a DataCube from JSON

+

openEO process graphs are typically stored and published in JSON format. +Most notably, user-defined processes are transferred between openEO client +and back-end in a JSON structure roughly like in this example:

+
{
+  "id": "evi",
+  "parameters": [
+    {"name": "red", "schema": {"type": "number"}},
+    {"name": "blue", "schema": {"type": "number"}},
+    ...
+  ],
+  "process_graph": {
+    "sub": {"process_id": "subtract", "arguments": {"x": {"from_parameter": "nir"}, "y": {"from_parameter": "red"}}},
+    "p1": {"process_id": "multiply", "arguments": {"x": 6, "y": {"from_parameter": "red"}}},
+    "div": {"process_id": "divide", "arguments": {"x": {"from_node": "sub"}, "y": {"from_node": "sum"}},
+    ...
+
+
+

It is possible to construct a DataCube object that corresponds with this +process graph with the Connection.datacube_from_json method. +It can be given one of:

+
+
    +
  • a raw JSON string,

  • +
  • a path to a local JSON file,

  • +
  • an URL that points to a JSON resource

  • +
+
+

The JSON structure should be one of:

+
+
    +
  • a mapping (dictionary) like the example above with at least a "process_graph" item, +and optionally a "parameters" item.

  • +
  • a mapping (dictionary) with {"process_id": ...} items

  • +
+
+
+

Some examples

+

Load a DataCube from a raw JSON string, containing a +simple “flat graph” representation:

+
raw_json = '''{
+    "lc": {"process_id": "load_collection", "arguments": {"id": "SENTINEL2_TOC"}},
+    "ak": {"process_id": "apply_kernel", "arguments": {"data": {"from_node": "lc"}, "kernel": [[1,2,1],[2,5,2],[1,2,1]]}, "result": true}
+}'''
+cube = connection.datacube_from_json(raw_json)
+
+
+

Load from a raw JSON string, containing a mapping with “process_graph” and “parameters”:

+
raw_json = '''{
+    "parameters": [
+        {"name": "kernel", "schema": {"type": "array"}, "default": [[1,2,1], [2,5,2], [1,2,1]]}
+    ],
+    "process_graph": {
+        "lc": {"process_id": "load_collection", "arguments": {"id": "SENTINEL2_TOC"}},
+        "ak": {"process_id": "apply_kernel", "arguments": {"data": {"from_node": "lc"}, "kernel": {"from_parameter": "kernel"}}, "result": true}
+    }
+}'''
+cube = connection.datacube_from_json(raw_json)
+
+
+

Load directly from a local file or URL containing these kind of JSON representations:

+
# Local file
+cube = connection.datacube_from_json("path/to/my_udp.json")
+
+# URL
+cube = connection.datacube_from_json("https://example.com/my_udp.json")
+
+
+
+
+

Parameterization

+

When the process graph uses parameters, you must specify the desired parameter values +at the time of calling Connection.datacube_from_json.

+

For example, take this simple toy example of a process graph that takes the sum of 5 and a parameter “increment”:

+
raw_json = '''{"add": {
+    "process_id": "add",
+    "arguments": {"x": 5, "y": {"from_parameter": "increment"}},
+    "result": true
+}}'''
+
+
+

Trying to build a DataCube from it without specifying parameter values will fail +like this:

+
>>> cube = connection.datacube_from_json(raw_json)
+ProcessGraphVisitException: No substitution value for parameter 'increment'.
+
+
+

Instead, specify the parameter value:

+
>>> cube = connection.datacube_from_json(
+...    raw_json,
+...    parameters={"increment": 4},
+... )
+>>> cube.execute()
+9
+
+
+

Parameters can also be defined with default values, which will be used when they are not specified +in the Connection.datacube_from_json call:

+
raw_json = '''{
+    "parameters": [
+        {"name": "increment", "schema": {"type": "number"}, "default": 100}
+    ],
+    "process_graph": {
+        "add": {"process_id": "add", "arguments": {"x": 5, "y": {"from_parameter": "increment"}}, "result": true}
+    }
+}'''
+
+cube = connection.datacube_from_json(raw_json)
+result = cube.execute())
+# result will be 105
+
+
+
+

Re-parameterization

+

TODO

+
+
+
+
+

Building process graphs with multiple result nodes

+
+

Note

+

Multi-result support is added in version 0.35.0

+
+

Most openEO use cases are just about building a single result data cube, +which is readily covered in the openEO Python client library through classes like +DataCube and VectorCube. +It is straightforward to create a batch job from these, or execute/download them synchronously.

+

The openEO API also allows multiple result nodes in a single process graph, +for example to persist intermediate results or produce results in different output formats. +To support this, the openEO Python client library provides the MultiResult class, +which allows to group multiple DataCube and VectorCube objects +in a single entity that can be used to create or run batch jobs. For example:

+
from openeo import MultiResult
+
+cube1 = ...
+cube2 = ...
+multi_result = MultiResult([cube1, cube2])
+job = multi_result.create_job()
+
+
+

Moreover, it is not necessary to explicitly create such a +MultiResult object, +as the Connection.create_job() method +directly supports passing multiple data cube objects in a list, +which will be automatically grouped as a multi-result:

+
cube1 = ...
+cube2 = ...
+job = connection.create_job([cube1, cube2])
+
+
+
+

Important

+

Only a single Connection can be in play +when grouping multiple results like this. +As everything is to be merged in a single process graph +to be sent to a single backend, +it is not possible to mix cubes created from different connections.

+
+
+
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/development.html b/development.html new file mode 100644 index 000000000..592dacb3c --- /dev/null +++ b/development.html @@ -0,0 +1,521 @@ + + + + + + + + Development and maintenance — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +
+

Development and maintenance

+

For development on the openeo package itself, +it is recommended to install a local git checkout of the project +in development mode (-e) +with additional development related dependencies ([dev]) +like this:

+
pip install -e .[dev]
+
+
+

If you are on Windows and experience problems installing this way, you can find some solutions in section Development Installation on Windows.

+
+

Running the unit tests

+

The test suite of the openEO Python Client leverages +the nice pytest framework. +It is installed automatically when installing the openEO Python Client +with the [dev] extra as shown above. +Running the whole tests is as simple as executing:

+
pytest
+
+
+

There are a ton of command line options for fine-tuning +(e.g. select a subset of tests, how results should be reported, …). +Run pytest -h for a quick overview +or check the pytest documentation for more information.

+

For example:

+
# Skip tests that are marked as slow
+pytest -m "not slow"
+
+
+
+
+

Building the documentation

+

Building the documentation requires Sphinx +and some plugins +(which are installed automatically as part of the [dev] install).

+
+

Quick and easy

+

The easiest way to build the documentation is working from the docs folder +and using the Makefile:

+
# From `docs` folder
+make html
+
+
+

(assumes you have make available, if not: use python -msphinx -M html .  _build.)

+

This will generate the docs in HTML format under docs/_build/html/. +Open the HTML files manually, +or use Python’s built-in web server to host them locally, e.g.:

+
# From `docs` folder
+python -m http.server 8000
+
+
+

Then, visit http://127.0.0.1:8000/_build/html/ in your browser

+
+
+

Like a Pro

+

When doing larger documentation work, it can be tedious to manually rebuild the docs +and refresh your browser to check the result. +Instead, use sphinx-autobuild +to automatically rebuild on documentation changes and live-reload it in your browser. +After installation (pip install sphinx-autobuild in your development environment), +just run

+
# From project root
+sphinx-autobuild docs/ --watch openeo/ docs/_build/html/
+
+
+

and then visit http://127.0.0.1:8000 . +When you change (and save) documentation source files, your browser should now +automatically refresh and show the newly built docs. Just like magic.

+
+
+
+

Contributing code

+

User contributions (such as bug fixes and new features, both in source code and documentation) +are greatly appreciated and welcome.

+
+

Pull requests

+

We use a traditional GitHub Pull Request (PR) workflow +for user contributions, which roughly follows these steps:

+
    +
  • Create a personal fork of https://github.com/Open-EO/openeo-python-client +(unless you already have push permissions to an existing fork or the original repo)

  • +
  • Preferably: work on your contribution in a new feature branch

  • +
  • Push your feature branch to your fork and create a pull request

  • +
  • The pull request is the place for review, discussion and fine-tuning of your work

  • +
  • Once your pull request is in good shape it will be merged by a maintainer

  • +
+
+
+

Pre-commit for basic code quality checks

+

We started using the pre-commit tool +for basic fine-tuning of code style and quality in new contributions. +It’s currently not enforced, but enabling pre-commit is recommended and appreciated +when contributing code.

+
+

Note

+

Note that the whole repository does not fully follow all code styles rules at the moment. +We’re just gradually introducing it, piggybacking on new contributions and commits.

+
+
+

Pre-commit set up

+
    +
  • Install the general pre-commit command line tool:

    +
      +
    • The simplest option is to install it directly in the virtual environment +you are using for openEO Python client development (e.g. pip install pre-commit).

    • +
    • You can also install it globally on your system +(e.g. using pipx, +uv tool, +conda, homebrew, …) +so you can use it across different projects.

    • +
    +
  • +
  • Install the project specific git hook scripts by running this in the root of your local git clone:

    +
    pre-commit install
    +
    +
    +

    This will automatically install additional scripts and tools in a sandbox +to run the various checks defined in the project’s .pre-commit-config.yaml configuration file.

    +
  • +
+
+
+

Pre-commit usage

+

When you commit new changes, the freshly installed pre-commit hook +will now automatically run each of the configured linters/formatters/… +Some of these just flag issues (e.g. invalid JSON files) +while others even automatically fix problems (e.g. clean up excessive whitespace).

+

If there is some kind of violation, the commit will be blocked. +Address these problems and try to commit again.

+
+

Attention

+

Some pre-commit tools directly edit your files (e.g. formatting tweaks) +instead of just flagging issues. +This might feel intrusive at first, but once you get the hang of it, +it should allow to streamline your workflow.

+

In particular, it is recommended to use the staging feature of git to prepare your commit. +Pre-commit’s proposed changes are not staged automatically, +so you can more easily keep them separate and review.

+
+
+

Tip

+

You can temporarily disable pre-commit for these rare cases +where you intentionally want to commit violating code style, +e.g. through git commit command line option -n/--no-verify.

+
+
+
+
+
+

Creating a release

+

This section describes the procedure to create +properly versioned releases of the openeo package +that can be downloaded by end users (e.g. through pip from pypi.org) +and depended on by other projects.

+

The releases will end up on:

+ +
+

Prerequisites

+
    +
  • You have permissions to push branches and tags and maintain releases on +the openeo-python-client project on GitHub.

  • +
  • You have permissions to upload releases to the +openeo project on pypi.org

  • +
  • The Python virtual environment you work in has the latest versions +of the twine package installed. +If you plan to build the wheel yourself (instead of letting GitHub or Jenkins do this), +you also need recent enough versions of the setuptools and wheel packages.

  • +
+
+
+

Important files

+
+
setup.py

describes the metadata of the package, +like package name openeo and version +(which is extracted from openeo/_version.py).

+
+
openeo/_version.py

defines the version of the package. +During general development, this version string should contain +a pre-release +segment (e.g. a1 for alpha releases, b1 for beta releases, etc) +to avoid collision with final releases. For example:

+
__version__ = '0.8.0a1'
+
+
+

As discussed below, this pre-release suffix should +only be removed during the release procedure +and restored when bumping the version after the release procedure.

+
+
CHANGELOG.md

keeps track of important changes associated with each release. +It follows the Keep a Changelog convention +and should be properly updated with each bug fix, feature addition/removal, … +under the Unreleased section during development.

+
+
+
+
+

Procedure

+

These are the steps to create and publish a new release of the openeo package. +To avoid the confusion with ad-hoc injection of some abstract version placeholder +that has to be replaced properly, +we will use a concrete version 0.8.0 in the examples below.

+
    +
  1. Make sure you are working on latest master branch, +without uncommitted changes and all tests are properly passing.

  2. +
  3. Create release commit:

    +
      +
    1. Drop the pre-release suffix from the version string in openeo/_version.py +so that it just a “final” semantic versioning string, e.g. 0.8.0

    2. +
    3. Update CHANGELOG.md: rename the “Unreleased” section title +to contain version and date, e.g.:

      +
      ## [0.8.0] - 2020-12-15
      +
      +
      +

      remove empty subsections +and start a new “Unreleased” section above it, like:

      +
      ## [Unreleased]
      +
      +### Added
      +
      +### Changed
      +
      +### Removed
      +
      +### Fixed
      +
      +
      +
    4. +
    5. Commit these changes in git with a commit message like Release 0.8.0 +and push to GitHub:

      +
      git add openeo/_version.py CHANGELOG.md
      +git commit -m 'Release 0.8.0'
      +git push origin master
      +
      +
      +
    6. +
    +
  4. +
  5. Optional, but recommended: wait for VITO Jenkins to build this updated master +(trigger it manually if necessary), +so that a build of a final, non-alpha release 0.8.0 +is properly uploaded to VITO artifactory.

  6. +
  7. Create release on PyPI:

    +
      +
    1. Obtain a wheel archive of the package, with one of these approaches:

      +
        +
      • Preferably, the path of least surprise: build wheel through GitHub Actions. +Go to workflow “Build wheel”, +manually trigger a build with “Run workflow” button, wait for it to finish successfully, +download generated artifact.zip, and finally: unzip it to obtain openeo-0.8.0-py3-none-any.whl

      • +
      • Or, if you know what you are doing and you’re sure you have a clean +local checkout, you can also build it locally:

        +
        python setup.py bdist_wheel
        +
        +
        +

        This should create dist/openeo-0.8.0-py3-none-any.whl

        +
      • +
      +
    2. +
    3. Upload this wheel to openeo project on PyPI:

      +
      python -m twine upload openeo-0.8.0-py3-none-any.whl
      +
      +
      +

      Check the release history on PyPI +to verify the twine upload. +Another way to verify that the freshly created release installs +is using docker to do a quick install-and-burn, +for example as follows (check the installed version in pip’s output):

      +
      docker run --rm -it python python -m pip install --no-deps openeo
      +
      +
      +
    4. +
    +
  8. +
  9. Create a git version tag and push it to GitHub:

    +
    git tag v0.8.0
    +git push origin v0.8.0
    +
    +
    +
  10. +
  11. Create a release in GitHub: +Go to https://github.com/Open-EO/openeo-python-client/releases/new, +Enter v0.8.0 under “tag”, +enter title: openEO Python Client v0.8.0, +use the corresponding CHANGELOG.md section as description +and publish it +(no need to attach binaries).

  12. +
  13. Bump the version in openeo/_version.py, (usually the “minor” level) +and append a pre-release “a1” suffix again, for example:

    +
    __version__ = '0.9.0a1'
    +
    +
    +

    Commit this (e.g. with message _version.py: bump to 0.9.0a1) +and push to GitHub.

    +
  14. +
  15. Update conda-forge package too +(requires conda recipe maintainer role). +Normally, the “regro-cf-autotick-bot” will create a pull request. +If it builds fine, merge it. +If not, fix the issue +(typically in recipe/meta.yaml) +and merge.

  16. +
  17. Optionally: make a post about the new release +on the openEO Platform Forum +or the CDSE Forum.

  18. +
+
+

Verification

+

The new release should now be available/listed at:

+ +

Here is a bash (subshell) oneliner to verify that the PyPI release works properly:

+
(
+    cd /tmp &&\
+    python -m venv venv-openeo &&\
+    source venv-openeo/bin/activate &&\
+    pip install -U openeo &&\
+    python -c "import openeo;print(openeo);print(openeo.__version__)"
+)
+
+
+

It tries to install the latest version of the openeo package in a temporary virtual env, +import it and print the package version.

+
+
+
+
+

Development Installation on Windows

+

Normally you can install the client the same way on Windows as on Linux, like so:

+
pip install -e .[dev]
+
+
+
+

Alternative development installation

+

The standard pure-pip based installation should work with the most recent code. +However, in the past we sometimes had issues with this procedure. +Should you experience problems, consider using an alternative conda-based installation procedure:

+
    +
  1. Create and activate a new conda environment for developing the openeo-python-client. +For example:

    +
    conda create -n openeopyclient
    +conda activate openeopyclient
    +
    +
    +
  2. +
  3. In that conda environment, install only the dependencies of openeo via conda, +but not the openeo package itself.

    +
    # Install openeo dependencies (from the conda-forge channel)
    +conda install --only-deps -c conda-forge openeo
    +
    +
    +
  4. +
  5. Do a pip install from the project root in editable mode (pip -e):

    +
    pip install -e .[dev]
    +
    +
    +
  6. +
+
+
+
+

Update of generated files

+

Some parts of the openEO Python Client Library source code are +generated/compiled from upstream sources (e.g. official openEO specifications). +Because updates are not often required, +it’s just a semi-manual procedure (to run from the project root):

+
# Update the sub-repositories (like git submodules, but optional)
+python specs/update-subrepos.py
+
+# Update `openeo/processes.py` from specifications in openeo-processes repository
+python openeo/internal/processes/generator.py  specs/openeo-processes specs/openeo-processes/proposals --output openeo/processes.py
+
+# Update the openEO process mapping documentation page
+python docs/process_mapping.py > docs/process_mapping.rst
+
+
+
+
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/genindex.html b/genindex.html new file mode 100644 index 000000000..cddbd457e --- /dev/null +++ b/genindex.html @@ -0,0 +1,1794 @@ + + + + + + + Index — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ + +

Index

+ +
+ _ + | A + | B + | C + | D + | E + | F + | G + | H + | I + | J + | L + | M + | N + | O + | P + | Q + | R + | S + | T + | U + | V + | W + | X + +
+

_

+ + + +
+ +

A

+ + + +
+ +

B

+ + + +
+ +

C

+ + + +
+ +

D

+ + + +
+ +

E

+ + + +
+ +

F

+ + + +
+ +

G

+ + + +
+ +

H

+ + +
+ +

I

+ + + +
+ +

J

+ + + +
+ +

L

+ + + +
+ +

M

+ + + +
+ +

N

+ + + +
+ +

O

+ + + +
+ +

P

+ + + +
+ +

Q

+ + +
+ +

R

+ + + +
+ +

S

+ + + +
+ +

T

+ + + +
+ +

U

+ + + +
+ +

V

+ + + +
+ +

W

+ + +
+ +

X

+ + + +
+ + + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/index.html b/index.html new file mode 100644 index 000000000..96d479527 --- /dev/null +++ b/index.html @@ -0,0 +1,380 @@ + + + + + + + + openEO Python Client — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +
+

openEO Python Client

+https://img.shields.io/badge/Status-Stable-yellow.svg +

Welcome to the documentation of openeo, +the official Python client library for interacting with openEO back-ends +to process remote sensing and Earth observation data. +It provides a Pythonic interface for the openEO API, +supporting data/process discovery, process graph building, +batch job management and much more.

+
+

Usage example

+

A simple example, to give a feel of using this library:

+
import openeo
+
+# Connect to openEO back-end.
+connection = openeo.connect("openeo.vito.be").authenticate_oidc()
+
+# Load data cube from TERRASCOPE_S2_NDVI_V2 collection.
+cube = connection.load_collection(
+    "TERRASCOPE_S2_NDVI_V2",
+    spatial_extent={"west": 5.05, "south": 51.21, "east": 5.1, "north": 51.23},
+    temporal_extent=["2022-05-01", "2022-05-30"],
+    bands=["NDVI_10M"],
+)
+# Rescale digital number to physical values and take temporal maximum.
+cube = cube.apply(lambda x: 0.004 * x - 0.08).max_time()
+
+cube.download("ndvi-max.tiff")
+
+
+_images/welcome.png +
+
+

Table of contents

+
+ +
+
+
+

Indices and tables

+ +
+
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/installation.html b/installation.html new file mode 100644 index 000000000..d9ac5294f --- /dev/null +++ b/installation.html @@ -0,0 +1,230 @@ + + + + + + + + Installation — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +
+

Installation

+

It is an explicit goal of the openEO Python client library to be as easy to install as possible, +unlocking the openEO ecosystem to a broad audience. +The package is a pure Python implementation and its dependencies are carefully considered (in number and complexity).

+
+

Basic install

+

It is recommended to work in a some kind of virtual environment (venv, conda, …) +to avoid polluting the base install of Python on your operating system +or introducing conflicts with other applications. +How you organize your virtual environments heavily depends on your use case and workflow, +and is out of scope of this documentation.

+
+

Installation with pip

+

The openEO Python client library is available from PyPI +and can be easily installed with a tool like pip, for example:

+
$ pip install openeo
+
+
+

To upgrade the package to the latest release:

+
$ pip install --upgrade openeo
+
+
+
+
+

Installation with Conda

+

The openEO Python client library is available on conda-forge +and can be easily installed in a conda environment, for example:

+
$ conda install -c conda-forge openeo
+
+
+
+
+

Verifying and troubleshooting

+

You can check if the installation worked properly +by trying to import the openeo package in a Python script, interactive shell or notebook:

+
import openeo
+
+print(openeo.client_version())
+
+
+

This should print the installed version of the openeo package.

+

If the first line gives an error like ModuleNotFoundError: No module named 'openeo', +some troubleshooting tips:

+
    +
  • Restart you Python shell or notebook (or start a fresh one).

  • +
  • Double check that the installation went well, +e.g. try re-installing and keep an eye out for error/warning messages.

  • +
  • Make sure that you are working in the same (virtual) environment you installed the package in.

  • +
+

If you still have troubles installing and importing openeo, +feel free to reach out in the community forum +or the project’s issue tracker. +Try to describe your setup in enough detail: your operating system, +which virtual environment system you use, +the installation tool (pip, conda or something else), …

+
+
+
+

Optional dependencies

+

Depending on your use case, you might also want to install some additional libraries. +For example:

+
    +
  • netCDF4 or h5netcdf for loading and writing NetCDF files (e.g. integrated in xarray.load_dataset())

  • +
  • matplotlib for visualisation (e.g. integrated plot functionality in xarray )

  • +
  • pyarrow for (read/write) support of Parquet files +(e.g. with MultiBackendJobManager)

  • +
  • rioxarray for GeoTIFF support in the assert helpers from openeo.testing.results

  • +
  • geopandas for working with dataframes with geospatial support, +(e.g. with MultiBackendJobManager)

  • +
+
+

Enabling additional features

+

To use the on-demand preview feature and other Jupyter-enabled features, you need to install the necessary dependencies.

+
$ pip install openeo[jupyter]
+
+
+
+
+
+

Source or development install

+

If you closely track the development of the openeo package at +github.com/Open-EO/openeo-python-client +and want to work with unreleased features or contribute to the development of the package, +you can install it as follows from the root of a git source checkout:

+
$ pip install -e .[dev]
+
+
+

The -e option enables “development mode”, which makes sure that changes you make to the source code +happen directly on the installed package, so that you don’t have to re-install the package each time +you make a change.

+

The [dev] (a so-called “extra”) installs additional development related dependencies, +for example to run the unit tests.

+

You can also find more information about installation for development on the Development and maintenance page.

+
+
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/lib/openeo/__init__.py b/lib/openeo/__init__.py new file mode 100644 index 000000000..782843f8f --- /dev/null +++ b/lib/openeo/__init__.py @@ -0,0 +1,29 @@ +""" + + +""" + +__title__ = 'openeo' +__author__ = 'Jeroen Dries' + + +class BaseOpenEoException(Exception): + pass + + +import importlib.metadata + +from openeo._version import __version__ +from openeo.rest.connection import Connection, connect, session +from openeo.rest.datacube import UDF, DataCube +from openeo.rest.graph_building import collection_property +from openeo.rest.job import BatchJob, RESTJob +from openeo.rest.multiresult import MultiResult +from openeo.rest.vectorcube import VectorCube + + +def client_version() -> str: + try: + return importlib.metadata.version("openeo") + except importlib.metadata.PackageNotFoundError: + return __version__ diff --git a/lib/openeo/_version.py b/lib/openeo/_version.py new file mode 100644 index 000000000..d9f2629e2 --- /dev/null +++ b/lib/openeo/_version.py @@ -0,0 +1 @@ +__version__ = "0.36.0" diff --git a/lib/openeo/api/__init__.py b/lib/openeo/api/__init__.py new file mode 100644 index 000000000..88cc8b8b5 --- /dev/null +++ b/lib/openeo/api/__init__.py @@ -0,0 +1,3 @@ +""" +Wrappers for openEO API concepts. +""" diff --git a/lib/openeo/api/logs.py b/lib/openeo/api/logs.py new file mode 100644 index 000000000..5a7ae02d5 --- /dev/null +++ b/lib/openeo/api/logs.py @@ -0,0 +1,99 @@ +import logging +from typing import Optional, Union + + +class LogEntry(dict): + """ + Log message and info for jobs and services + + Fields: + - ``id``: Unique ID for the log, string, REQUIRED + - ``code``: Error code, string, optional + - ``level``: Severity level, string (error, warning, info or debug), REQUIRED + - ``message``: Error message, string, REQUIRED + - ``time``: Date and time of the error event as RFC3339 date-time, string, available since API 1.1.0 + - ``path``: A "stack trace" for the process, array of dicts + - ``links``: Related links, array of dicts + - ``usage``: Usage metrics available as property 'usage', dict, available since API 1.1.0 + May contain the following metrics: cpu, memory, duration, network, disk, storage and other custom ones + Each of the metrics is also a dict with the following parts: value (numeric) and unit (string) + - ``data``: Arbitrary data the user wants to "log" for debugging purposes. + Please note that this property may not exist as there's a difference + between None and non-existing. None for example refers to no-data in + many cases while the absence of the property means that the user did + not provide any data for debugging. + """ + + _required = {"id", "level", "message"} + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + # Check required fields + missing = self._required.difference(self.keys()) + if missing: + raise ValueError("Missing required fields: {m}".format(m=sorted(missing))) + + @property + def id(self): + return self["id"] + + # Legacy alias + log_id = id + + @property + def message(self): + return self["message"] + + @property + def level(self): + return self["level"] + + # TODO: add properties for "code", "time", "path", "links" and "data" with sensible defaults? + + +def normalize_log_level( + log_level: Union[int, str, None], default: int = logging.DEBUG +) -> int: + """ + Helper function to convert a openEO API log level (e.g. string "error") + to the integer constants defined in Python's standard library ``logging`` module (e.g. ``logging.ERROR``). + + :param log_level: log level to normalize: a log level string in the style of + the openEO API ("error", "warning", "info", or "debug"), + an integer value (e.g. a ``logging`` constant), or ``None``. + + :param default: fallback log level to return on unknown log level strings or ``None`` input. + + :raises TypeError: when log_level is any other type than str, an int or None. + :return: One of the following log level constants from the standard module ``logging``: + ``logging.ERROR``, ``logging.WARNING``, ``logging.INFO``, or ``logging.DEBUG`` . + """ + if isinstance(log_level, str): + log_level = log_level.upper() + if log_level in ["CRITICAL", "ERROR", "FATAL"]: + return logging.ERROR + elif log_level in ["WARNING", "WARN"]: + return logging.WARNING + elif log_level == "INFO": + return logging.INFO + elif log_level == "DEBUG": + return logging.DEBUG + else: + return default + elif isinstance(log_level, int): + return log_level + elif log_level is None: + return default + else: + raise TypeError( + f"Value for log_level is not an int or str: type={type(log_level)}, value={log_level!r}" + ) + + +def log_level_name(log_level: Union[int, str, None]) -> str: + """ + Get the name of a normalized log level. + This value conforms to log level names used in the openEO API. + """ + return logging.getLevelName(normalize_log_level(log_level)).lower() diff --git a/lib/openeo/api/process.py b/lib/openeo/api/process.py new file mode 100644 index 000000000..1e2d840ae --- /dev/null +++ b/lib/openeo/api/process.py @@ -0,0 +1,479 @@ +from __future__ import annotations + +import textwrap +import warnings +from typing import List, Optional, Union + + +class Parameter: + """ + A (process) parameter to build parameterized + :ref:`user-defined processes`. + + Parameter objects can be :ref:`defined ` + with at least a name and expected schema + (e.g. is the parameter a placeholder for a string, a bounding box, a date, ...) + and can then be :ref:`used ` + with various functions and classes, + like :py:class:`~openeo.rest.datacube.DataCube`, + to build parameterized user-defined processes. + + Apart from the generic :py:class:`Parameter` constructor, + this class also provides various helpers (class methods) + to easily create parameters for common parameter types. + + :param name: parameter name, which will be used to assign concrete values to. + It is recommended to stick to the convention of snake case naming (using lowercase with underscores). + :param description: human-readable description of the parameter. + :param schema: JSON schema describing the expected data type and structure of the parameter. + :param default: default value for the parameter when it's optional. + :param optional: toggle to indicate whether the parameter is optional or required. + """ + # TODO unify with openeo.internal.processes.parse.Parameter? + __slots__ = ("name", "description", "schema", "default", "optional") + + _DEFAULT_UNDEFINED = object() + + def __init__( + self, + name: str, + description: Optional[str] = None, + schema: Union[list, dict, str, None] = None, + default=_DEFAULT_UNDEFINED, + optional: Optional[bool] = None, + ): + self.name = name + if description is None: + # Description is required in openEO API, we are a bit more permissive here. + warnings.warn("Parameter without description: using name as description.") + description = name + self.description = description + self.schema = {"type": schema} if isinstance(schema, str) else (schema or {}) + # TODO: automatically set `optional` when `default` is set? + self.default = default + self.optional = optional + + def to_dict(self) -> dict: + """ + Convert to dictionary for JSON-serialization. + """ + d = {"name": self.name, "description": self.description, "schema": self.schema} + if self.optional is not None: + d["optional"] = self.optional + if self.default is not self._DEFAULT_UNDEFINED: + d["default"] = self.default + d["optional"] = True + return d + + @classmethod + def raster_cube(cls, name: str = "data", description: str = "A data cube.", **kwargs) -> Parameter: + """ + Helper to easily create a 'raster-cube' parameter. + + :param name: parameter name, which will be used to assign concrete values to. + It is recommended to stick to the convention of snake case naming (using lowercase with underscores). + :param description: human-readable description of the parameter. + + See the generic :py:class:`Parameter` constructor for information on additional arguments (except ``schema``). + """ + schema = {"type": "object", "subtype": "raster-cube"} + return cls(name=name, description=description, schema=schema, **kwargs) + + @classmethod + def datacube(cls, name: str = "data", description: str = "A data cube.", **kwargs) -> Parameter: + """ + Helper to easily create a 'datacube' parameter. + + :param name: parameter name, which will be used to assign concrete values to. + It is recommended to stick to the convention of snake case naming (using lowercase with underscores). + :param description: human-readable description of the parameter. + + See the generic :py:class:`Parameter` constructor for information on additional arguments (except ``schema``). + + .. versionadded:: 0.22.0 + """ + schema = {"type": "object", "subtype": "datacube"} + return cls(name=name, description=description, schema=schema, **kwargs) + + @classmethod + def string( + cls, + name: str, + description: Optional[str] = None, + *, + values: Optional[List[str]] = None, + subtype: Optional[str] = None, + format: Optional[str] = None, + **kwargs, + ) -> Parameter: + """ + Helper to easily create a 'string' parameter. + + :param name: parameter name, which will be used to assign concrete values to. + It is recommended to stick to the convention of snake case naming (using lowercase with underscores). + :param description: human-readable description of the parameter. + :param values: Optional list of allowed string values to make this an "enum". + :param subtype: Optional subtype of the 'string' schema. + :param format: Optional format of the 'string' schema. + + See the generic :py:class:`Parameter` constructor for information on additional arguments (except ``schema``). + """ + schema = {"type": "string"} + if values is not None: + schema["enum"] = values + if subtype: + schema["subtype"] = subtype + if format: + schema["format"] = format + return cls(name=name, description=description, schema=schema, **kwargs) + + @classmethod + def integer(cls, name: str, description: Optional[str] = None, **kwargs) -> Parameter: + """ + Helper to create an 'integer' parameter. + + :param name: parameter name, which will be used to assign concrete values to. + It is recommended to stick to the convention of snake case naming (using lowercase with underscores). + :param description: human-readable description of the parameter. + + See the generic :py:class:`Parameter` constructor for information on additional arguments (except ``schema``). + """ + return cls(name=name, description=description, schema={"type": "integer"}, **kwargs) + + @classmethod + def number(cls, name: str, description: Optional[str] = None, **kwargs) -> Parameter: + """ + Helper to easily create a 'number' parameter. + + :param name: parameter name, which will be used to assign concrete values to. + It is recommended to stick to the convention of snake case naming (using lowercase with underscores). + :param description: human-readable description of the parameter. + + See the generic :py:class:`Parameter` constructor for information on additional arguments (except ``schema``). + """ + return cls(name=name, description=description, schema={"type": "number"}, **kwargs) + + @classmethod + def boolean(cls, name: str, description: Optional[str] = None, **kwargs) -> Parameter: + """ + Helper to easily create a 'boolean' parameter. + + :param name: parameter name, which will be used to assign concrete values to. + It is recommended to stick to the convention of snake case naming (using lowercase with underscores). + :param description: human-readable description of the parameter. + + See the generic :py:class:`Parameter` constructor for information on additional arguments (except ``schema``). + """ + return cls(name=name, description=description, schema={"type": "boolean"}, **kwargs) + + @classmethod + def array( + cls, + name: str, + description: Optional[str] = None, + *, + item_schema: Optional[Union[str, dict]] = None, + **kwargs, + ) -> Parameter: + """ + Helper to easily create parameter with an 'array' schema. + + :param name: parameter name, which will be used to assign concrete values to. + It is recommended to stick to the convention of snake case naming (using lowercase with underscores). + :param description: human-readable description of the parameter. + :param item_schema: Schema of the array items given in JSON Schema style, e.g. ``{"type": "string"}``. + Simple schemas can also be specified as single string: + e.g. ``"string"`` will be expanded to ``{"type": "string"}``. + + See the generic :py:class:`Parameter` constructor for information on additional arguments (except ``schema``). + + .. versionchanged:: 0.23.0 + Added ``item_schema`` argument. + """ + schema = {"type": "array"} + if item_schema: + if isinstance(item_schema, str): + item_schema = {"type": item_schema} + schema["items"] = item_schema + return cls(name=name, description=description, schema=schema, **kwargs) + + @classmethod + def object( + cls, name: str, description: Optional[str] = None, *, subtype: Optional[str] = None, **kwargs + ) -> Parameter: + """ + Helper to create an 'object' type parameter + + :param name: parameter name, which will be used to assign concrete values to. + It is recommended to stick to the convention of snake case naming (using lowercase with underscores). + :param description: human-readable description of the parameter. + :param subtype: subtype of the 'object' schema + + See the generic :py:class:`Parameter` constructor for information on additional arguments (except ``schema``). + + .. versionadded:: 0.26.0 + """ + schema = {"type": "object"} + if subtype: + schema["subtype"] = subtype + return cls(name=name, description=description, schema=schema, **kwargs) + + @classmethod + def bounding_box( + cls, + name: str, + description: str = "Spatial extent specified as a bounding box with 'west', 'south', 'east' and 'north' fields.", + **kwargs, + ) -> Parameter: + """ + Helper to easily create a 'bounding box' parameter, which allows to specify a spatial extent + with "west", "south", "east" and "north" bounds (and optionally a CRS identifier). + + :param name: parameter name, which will be used to assign concrete values to. + It is recommended to stick to the convention of snake case naming (using lowercase with underscores). + :param description: human-readable description of the parameter. + + See the generic :py:class:`Parameter` constructor for information on additional arguments (except ``schema``). + + .. versionadded:: 0.30.0 + """ + schema = { + "type": "object", + "subtype": "bounding-box", + "required": ["west", "south", "east", "north"], + "properties": { + "west": { + "type": "number", + "description": "West (lower left corner, coordinate axis 1).", + }, + "south": { + "type": "number", + "description": "South (lower left corner, coordinate axis 2).", + }, + "east": { + "type": "number", + "description": "East (upper right corner, coordinate axis 1).", + }, + "north": { + "type": "number", + "description": "North (upper right corner, coordinate axis 2).", + }, + "crs": { + "description": "Coordinate reference system of the extent, specified as as [EPSG code](http://www.epsg-registry.org/) or [WKT2 CRS string](http://docs.opengeospatial.org/is/18-010r7/18-010r7.html). Defaults to `4326` (EPSG code 4326) unless the client explicitly requests a different coordinate reference system.", + "anyOf": [ + { + "type": "integer", + "subtype": "epsg-code", + "title": "EPSG Code", + "minimum": 1000, + }, + { + "type": "string", + "subtype": "wkt2-definition", + "title": "WKT2 definition", + }, + ], + "default": 4326, + }, + # TODO: support base and height? + }, + } + return cls(name=name, description=description, schema=schema, **kwargs) + + @classmethod + def spatial_extent( + cls, + name: str = "spatial_extent", + description: Optional[str] = None, + **kwargs, + ) -> Parameter: + """ + Helper to easily create a 'spatial_extent' parameter, which is compatible with the ``load_collection`` argument of + the same name. This allows to conveniently create user-defined processes that can be applied to a bounding box and vector data + for spatial filtering. It is also possible for users to set to null, and define spatial filtering using other processes. + + :param name: parameter name, which will be used to assign concrete values to. + It is recommended to stick to the convention of snake case naming (using lowercase with underscores). + :param description: human-readable description of the parameter. + + See the generic :py:class:`Parameter` constructor for information on additional arguments (except ``schema``). + + .. versionadded:: 0.32.0 + """ + if description is None: + description = textwrap.dedent( + """ + Limits the data to process to the specified bounding box or polygons. + + For raster data, the process loads the pixel into the data cube if the point + at the pixel center intersects with the bounding box or any of the polygons + (as defined in the Simple Features standard by the OGC). + + For vector data, the process loads the geometry into the data cube if the geometry + is fully within the bounding box or any of the polygons (as defined in the + Simple Features standard by the OGC). Empty geometries may only be in the + data cube if no spatial extent has been provided. + + Empty geometries are ignored. + + Set this parameter to null to set no limit for the spatial extent. + """ + ).strip() + + schema = [ + { + "title": "Bounding Box", + "type": "object", + "subtype": "bounding-box", + "required": ["west", "south", "east", "north"], + "properties": { + "west": {"description": "West (lower left corner, coordinate axis 1).", "type": "number"}, + "south": {"description": "South (lower left corner, coordinate axis 2).", "type": "number"}, + "east": {"description": "East (upper right corner, coordinate axis 1).", "type": "number"}, + "north": {"description": "North (upper right corner, coordinate axis 2).", "type": "number"}, + "base": { + "description": "Base (optional, lower left corner, coordinate axis 3).", + "type": ["number", "null"], + "default": None, + }, + "height": { + "description": "Height (optional, upper right corner, coordinate axis 3).", + "type": ["number", "null"], + "default": None, + }, + "crs": { + "description": "Coordinate reference system of the extent, specified as as [EPSG code](http://www.epsg-registry.org/) or [WKT2 CRS string](http://docs.opengeospatial.org/is/18-010r7/18-010r7.html). Defaults to `4326` (EPSG code 4326) unless the client explicitly requests a different coordinate reference system.", + "anyOf": [ + { + "title": "EPSG Code", + "type": "integer", + "subtype": "epsg-code", + "minimum": 1000, + "examples": [3857], + }, + {"title": "WKT2", "type": "string", "subtype": "wkt2-definition"}, + ], + "default": 4326, + }, + }, + }, + { + "title": "Vector data cube", + "description": "Limits the data cube to the bounding box of the given geometries in the vector data cube. For raster data, all pixels inside the bounding box that do not intersect with any of the polygons will be set to no data (`null`). Empty geometries are ignored.", + "type": "object", + "subtype": "datacube", + "dimensions": [{"type": "geometry"}], + }, + { + "title": "No filter", + "description": "Don't filter spatially. All data is included in the data cube.", + "type": "null", + }, + ] + return cls(name=name, description=description, schema=schema, **kwargs) + + @classmethod + def date(cls, name: str, description: str = "A date.", **kwargs) -> Parameter: + """ + Helper to easily create a 'date' parameter. + + :param name: parameter name, which will be used to assign concrete values to. + It is recommended to stick to the convention of snake case naming (using lowercase with underscores). + :param description: human-readable description of the parameter. + + See the generic :py:class:`Parameter` constructor for information on additional arguments (except ``schema``). + + .. versionadded:: 0.30.0 + """ + schema = {"type": "string", "subtype": "date", "format": "date"} + return cls(name=name, description=description, schema=schema, **kwargs) + + @classmethod + def date_time(cls, name: str, description: str = "A date with time.", **kwargs) -> Parameter: + """ + Helper to easily create a 'date-time' parameter. + + :param name: parameter name, which will be used to assign concrete values to. + It is recommended to stick to the convention of snake case naming (using lowercase with underscores). + :param description: human-readable description of the parameter. + + See the generic :py:class:`Parameter` constructor for information on additional arguments (except ``schema``). + + .. versionadded:: 0.30.0 + """ + schema = {"type": "string", "subtype": "date-time", "format": "date-time"} + return cls(name=name, description=description, schema=schema, **kwargs) + + @classmethod + def geojson(cls, name: str, description: str = "Geometries specified as GeoJSON object.", **kwargs) -> Parameter: + """ + Helper to easily create a 'geojson' parameter, which allows to specify geometries as an inline GeoJSON object. + + :param name: parameter name, which will be used to assign concrete values to. + It is recommended to stick to the convention of snake case naming (using lowercase with underscores). + :param description: human-readable description of the parameter. + + See the generic :py:class:`Parameter` constructor for information on additional arguments (except ``schema``). + + .. versionadded:: 0.30.0 + """ + schema = {"type": "object", "subtype": "geojson"} + return cls(name=name, description=description, schema=schema, **kwargs) + + @classmethod + def temporal_interval( + cls, + name: str = "temporal_extent", + description: str = "Temporal extent specified as two-element array with start and end date/date-time.", + **kwargs, + ) -> Parameter: + """ + Helper to easily create a 'temporal-interval' parameter, which allows to specify a temporal extent + as a two-element array with start and end date/date-time. + + :param name: parameter name, which will be used to assign concrete values to. + It is recommended to stick to the convention of snake case naming (using lowercase with underscores). + :param description: human-readable description of the parameter. + + See the generic :py:class:`Parameter` constructor for information on additional arguments (except ``schema``). + + .. versionadded:: 0.30.0 + """ + schema = { + "type": "array", + "subtype": "temporal-interval", + "uniqueItems": True, + "minItems": 2, + "maxItems": 2, + "items": { + "anyOf": [ + {"type": "string", "subtype": "date-time", "format": "date-time"}, + {"type": "string", "subtype": "date", "format": "date"}, + {"type": "null"}, + ] + }, + } + return cls(name=name, description=description, schema=schema, **kwargs) + + +def schema_supports(schema: Union[dict, List[dict]], type: str, subtype: Optional[str] = None) -> bool: + """Helper to check if parameter schema supports given type/subtype""" + # TODO: support checking item type in arrays + if isinstance(schema, dict): + actual_type = schema.get("type") + if isinstance(actual_type, str): + if actual_type != type: + return False + elif isinstance(actual_type, list): + if type not in actual_type: + return False + else: + raise ValueError(actual_type) + if subtype: + if schema.get("subtype") != subtype: + return False + return True + elif isinstance(schema, list): + return any(schema_supports(s, type=type, subtype=subtype) for s in schema) + else: + raise ValueError(schema) diff --git a/lib/openeo/capabilities.py b/lib/openeo/capabilities.py new file mode 100644 index 000000000..5d80bf3ec --- /dev/null +++ b/lib/openeo/capabilities.py @@ -0,0 +1,209 @@ +from __future__ import annotations + +import contextlib +import re +from abc import ABC +from typing import Tuple, Union + +# TODO Is this base class (still) useful? + + +class Capabilities(ABC): + """Represents capabilities of a connection / back end.""" + + def __init__(self, data): + pass + + def version(self): + """ Get openEO version. DEPRECATED: use api_version instead""" + # Field: version + # TODO: raise deprecation warning here? + return self.api_version() + + def api_version(self) -> str: + """Get OpenEO API version.""" + raise NotImplementedError + + @property + def api_version_check(self) -> ComparableVersion: + """Helper to easily check if the API version is at least or below some threshold version.""" + api_version = self.api_version() + if not api_version: + raise ApiVersionException("No API version found") + return ComparableVersion(api_version) + + def list_features(self): + """ List all supported features / endpoints.""" + # Field: endpoints + pass + + def has_features(self, method_name): + """ Check whether a feature / endpoint is supported.""" + # Field: endpoints > ... + pass + + def currency(self): + """ Get default billing currency.""" + # Field: billing > currency + pass + + def list_plans(self): + """ List all billing plans.""" + # Field: billing > plans + pass + + +# Type annotation aliases +_VersionTuple = Tuple[Union[int, str], ...] + + +class ComparableVersion: + """ + Helper to compare a version (e.g. API version) against another (threshold) version + + >>> v = ComparableVersion('1.2.3') + >>> v.at_least('1.2.1') + True + >>> v.at_least('1.10.2') + False + >>> v > "2.0" + False + + To express a threshold condition you sometimes want the reference or threshold value on + the left hand side or right hand side of the logical expression. + There are two groups of methods to handle each case: + + - right hand side referencing methods. These read more intuitively. For example: + + `a.at_least(b)`: a is equal or higher than b + `a.below(b)`: a is lower than b + + - left hand side referencing methods. These allow "currying" a threshold value + in a reusable condition callable. For example: + + `a.or_higher(b)`: b is equal or higher than a + `a.accept_lower(b)`: b is lower than a + + Implementation is loosely based on (now deprecated) `distutils.version.LooseVersion`, + which pragmatically parses version strings as a sequence of numbers (compared numerically) + or alphabetic strings (compared lexically), e.g.: 1.5.1, 1.5.2b2, 161, 8.02, 2g6, 2.2beta29. + """ + + _component_re = re.compile(r'(\d+ | [a-zA-Z]+ | \.)', re.VERBOSE) + + def __init__(self, version: Union[str, 'ComparableVersion', tuple]): + if isinstance(version, ComparableVersion): + self._version = version._version + elif isinstance(version, tuple): + self._version = version + elif isinstance(version, str): + self._version = self._parse(version) + else: + raise ValueError(version) + + @classmethod + def _parse(cls, version_string: str) -> _VersionTuple: + components = [ + x for x in cls._component_re.split(version_string) + if x and x != '.' + ] + for i, obj in enumerate(components): + with contextlib.suppress(ValueError): + components[i] = int(obj) + return tuple(components) + + @property + def parts(self) -> _VersionTuple: + """Version components as a tuple""" + return self._version + + def __repr__(self): + return '{c}({v!r})'.format(c=type(self).__name__, v=self._version) + + def __str__(self): + return ".".join(map(str, self._version)) + + def __hash__(self): + return hash(self._version) + + def to_string(self): + return str(self) + + @staticmethod + def _pad(a: Union[str, ComparableVersion], b: Union[str, ComparableVersion]) -> Tuple[_VersionTuple, _VersionTuple]: + """Pad version tuples with zero/empty to get same length for intuitive comparison""" + a = ComparableVersion(a)._version + b = ComparableVersion(b)._version + if len(a) > len(b): + b = b + tuple(0 if isinstance(x, int) else "" for x in a[len(b) :]) + elif len(b) > len(a): + a = a + tuple(0 if isinstance(x, int) else "" for x in b[len(a) :]) + return a, b + + def __eq__(self, other: Union[str, ComparableVersion]) -> bool: + a, b = self._pad(self, other) + return a == b + + def __ge__(self, other: Union[str, ComparableVersion]) -> bool: + a, b = self._pad(self, other) + return a >= b + + def __gt__(self, other: Union[str, ComparableVersion]) -> bool: + a, b = self._pad(self, other) + return a > b + + def __le__(self, other: Union[str, ComparableVersion]) -> bool: + a, b = self._pad(self, other) + return a <= b + + def __lt__(self, other: Union[str, ComparableVersion]) -> bool: + a, b = self._pad(self, other) + return a < b + + def equals(self, other: Union[str, 'ComparableVersion']): + return self == other + + # Right hand side referencing expressions. + def at_least(self, other: Union[str, 'ComparableVersion']): + """Self is at equal or higher than other.""" + return self >= other + + def above(self, other: Union[str, 'ComparableVersion']): + """Self is higher than other.""" + return self > other + + def at_most(self, other: Union[str, 'ComparableVersion']): + """Self is equal or lower than other.""" + return self <= other + + def below(self, other: Union[str, 'ComparableVersion']): + """Self is lower than other.""" + return self < other + + # Left hand side referencing expressions. + def or_higher(self, other: Union[str, 'ComparableVersion']): + """Other is equal or higher than self.""" + return ComparableVersion(other) >= self + + def or_lower(self, other: Union[str, 'ComparableVersion']): + """Other is equal or lower than self""" + return ComparableVersion(other) <= self + + def accept_lower(self, other: Union[str, 'ComparableVersion']): + """Other is lower than self.""" + return ComparableVersion(other) < self + + def accept_higher(self, other: Union[str, 'ComparableVersion']): + """Other is higher than self.""" + return ComparableVersion(other) > self + + def require_at_least(self, other: Union[str, "ComparableVersion"]): + """Raise exception if self is not at least other.""" + if not self.at_least(other): + raise ApiVersionException( + f"openEO API version should be at least {other!s}, but got {self!s}." + ) + + +class ApiVersionException(RuntimeError): + pass diff --git a/lib/openeo/config.py b/lib/openeo/config.py new file mode 100644 index 000000000..8c46a1924 --- /dev/null +++ b/lib/openeo/config.py @@ -0,0 +1,209 @@ +""" + +openEO client configuration (e.g. through config files) + +""" + +from __future__ import annotations + +import logging +import os +import platform +from configparser import ConfigParser +from copy import deepcopy +from pathlib import Path +from typing import Any, Iterator, List, Optional, Sequence, Union + +from openeo.util import in_interactive_mode + +_log = logging.getLogger(__name__) + +DEFAULT_APP_NAME = "openeo-python-client" + + +def _get_user_dir( + app_name=DEFAULT_APP_NAME, + xdg_env_var="XDG_CONFIG_HOME", + win_env_var="APPDATA", + fallback="~/.config", + win_fallback="~\\AppData\\Roaming", + macos_fallback="~/Library/Preferences", + auto_create=True, +) -> Path: + """ + Get platform specific config/data/cache folder + """ + # Platform specific root locations (from highest priority to lowest) + env = os.environ + if platform.system() == "Windows": + roots = [env.get(win_env_var), win_fallback, fallback] + elif platform.system() == "Darwin": + roots = [env.get(xdg_env_var), macos_fallback, fallback] + else: + # Assume unix + roots = [env.get(xdg_env_var), fallback] + + # Filter out None's, expand user prefix and append app name + dirs = [Path(r).expanduser() / app_name for r in roots if r] + # Prepend with OPENEO_CONFIG_HOME if set. + if env.get("OPENEO_CONFIG_HOME"): + dirs.insert(0, Path(env.get("OPENEO_CONFIG_HOME"))) + + # Use highest prio dir that already exists. + for p in dirs: + if p.exists() and p.is_dir(): + return p + + # No existing dir: create highest prio one (if possible) + if auto_create: + for p in dirs: + try: + p.mkdir(parents=True) + _log.info("Created user dir for {a!r}: {p}".format(a=app_name, p=p)) + return p + except OSError: + pass + + raise Exception("Failed to find user dir for {a!r}. Tried: {p!r}".format(a=app_name, p=dirs)) + + +def get_user_config_dir(app_name=DEFAULT_APP_NAME, auto_create=True) -> Path: + """ + Get platform specific config folder + """ + return _get_user_dir( + app_name=app_name, + xdg_env_var="XDG_CONFIG_HOME", + win_env_var="APPDATA", + fallback="~/.config", + win_fallback="~\\AppData\\Roaming", + macos_fallback="~/Library/Preferences", + auto_create=auto_create, + ) + + +def get_user_data_dir(app_name=DEFAULT_APP_NAME, auto_create=True) -> Path: + """ + Get platform specific data folder + """ + return _get_user_dir( + app_name=app_name, + xdg_env_var="XDG_DATA_HOME", + win_env_var="APPDATA", + fallback="~/.local/share", + win_fallback="~\\AppData\\Roaming", + macos_fallback="~/Library", + auto_create=auto_create, + ) + + +class ClientConfig: + """ + openEO client configuration. Essentially a flat mapping of config key-value pairs. + """ + + # TODO: support for loading JSON based config files? + + def __init__(self): + self._config = {} + self._sources = [] + + @classmethod + def _key(cls, key: Union[str, Sequence[str]]): + """Normalize a key: make lower case and flatten sequences""" + if not isinstance(key, str): + key = ".".join(str(k) for k in key) + return key.lower() + + def _set(self, key: Union[str, Sequence[str]], value: Any): + """Set config value at key""" + self._config[self._key(key)] = value + + def get(self, key: Union[str, Sequence[str]], default=None) -> Any: + """Get setting at given key""" + # TODO: option to cast/convert to certain type? + return self._config.get(self._key(key), default) + + def load_ini_file(self, path: Union[str, Path]) -> ClientConfig: + cp = ConfigParser() + read_ok = cp.read(path) + self._sources.extend(read_ok) + return self.load_config_parser(cp) + + def load_config_parser(self, parser: ConfigParser) -> ClientConfig: + for section in parser.sections(): + for option, value in parser.items(section=section): + self._set(key=(section, option), value=value) + return self + + def dump(self) -> dict: + return deepcopy(self._config) + + @property + def sources(self) -> List[str]: + return [str(s) for s in self._sources] + + def __repr__(self): + return f"<{type(self).__name__} from {self.sources}>" + + +class ConfigLoader: + @classmethod + def config_locations(cls) -> Iterator[Path]: + """Config location candidates""" + # From highest to lowest priority + if "OPENEO_CLIENT_CONFIG" in os.environ: + yield Path(os.environ["OPENEO_CLIENT_CONFIG"]) + yield Path.cwd() / "openeo-client-config.ini" + if "OPENEO_CONFIG_HOME" in os.environ: + yield Path(os.environ["OPENEO_CONFIG_HOME"]) / "openeo-client-config.ini" + if "XDG_CONFIG_HOME" in os.environ: + yield Path(os.environ["XDG_CONFIG_HOME"]) / DEFAULT_APP_NAME / "openeo-client-config.ini" + yield Path.home() / ".openeo-client-config.ini" + + @classmethod + def load(cls) -> ClientConfig: + # TODO: (option to) merge layered configs instead of returning on first hit? + config = ClientConfig() + for path in cls.config_locations(): + _log.debug(f"Config file candidate: {path}") + if path.exists(): + if path.suffix.lower() == ".ini": + _log.debug(f"Loading config from {path}") + try: + config.load_ini_file(path) + break + except Exception: + _log.warning(f"Failed to load config from {path}", exc_info=True) + return config + + +# Global config (lazily loaded by :py:func:`get_config`) +_global_config = None + + +def get_config() -> ClientConfig: + """Get global openEO client config (:py:class:`ClientConfig`) (lazy loaded).""" + global _global_config + if _global_config is None: + _global_config = ConfigLoader.load() + # Note: explicit `', '.join()` instead of implicit `repr` on full `sources` list + # as the latter causes ugly escaping of Windows path separator. + message = f"Loaded openEO client config from sources: [{', '.join(_global_config.sources)}]" + _log.info(message) + if _global_config.sources: + config_log(message) + + return _global_config + + +def get_config_option(key: Optional[str] = None, default=None) -> str: + """Get config value for given key from global config (lazy loaded).""" + return get_config().get(key=key, default=default) + + +def config_log(message: str): + """Print a config related message if verbosity is configured for that.""" + verbose = get_config_option("general.verbose", default="auto") + if verbose == "print" or (verbose == "auto" and in_interactive_mode()): + print(message) diff --git a/lib/openeo/dates.py b/lib/openeo/dates.py new file mode 100644 index 000000000..834c23f90 --- /dev/null +++ b/lib/openeo/dates.py @@ -0,0 +1,202 @@ +from __future__ import annotations + +import datetime as dt +import re +from enum import Enum +from typing import Any, Tuple, Union + +from openeo.util import rfc3339 + + +def get_temporal_extent( + *args, + start_date: Union[str, dt.date, None, Any] = None, + end_date: Union[str, dt.date, None, Any] = None, + extent: Union[list, tuple, str, None] = None, + convertor=rfc3339.normalize, +) -> Tuple[Union[str, None], Union[str, None]]: + """ + Helper to derive a date extent from various call forms: + + >>> get_temporal_extent("2019-01-01") + ("2019-01-01", None) + >>> get_temporal_extent("2019-01-01", "2019-05-15") + ("2019-01-01", "2019-05-15") + >>> get_temporal_extent(["2019-01-01", "2019-05-15"]) + ("2019-01-01", "2019-05-15") + >>> get_temporal_extent(start_date="2019-01-01", end_date="2019-05-15"]) + ("2019-01-01", "2019-05-15") + >>> get_temporal_extent(extent=["2019-01-01", "2019-05-15"]) + ("2019-01-01", "2019-05-15") + + It also supports resolving year/month shorthand notation (rounding down to first day of year or month): + + >>> get_temporal_extent("2019") + ("2019-01-01", None) + >>> get_temporal_extent(start_date="2019-02", end_date="2019-03"]) + ("2019-02-01", "2019-03-01") + + And even interpretes extents given as a single string: + + >>> get_temporal_extent(extent="2021") + ("2021-01-01", "2022-01-01") + + """ + if (bool(len(args) > 0) + bool(start_date or end_date) + bool(extent)) > 1: + raise ValueError("At most one of `*args`, `start_date/end_date`, or `extent` should be provided") + if args: + # Convert positional `*args` to `start_date`/`end_date` argument + if len(args) == 2: + start_date, end_date = args + elif len(args) == 1: + arg = args[0] + if isinstance(arg, (list, tuple)): + if len(args) > 2: + raise ValueError(f"Unable to handle {args} as a temporal extent") + start_date, end_date = tuple(arg) + (None,) * (2 - len(arg)) + else: + start_date, end_date = arg, None + else: + raise ValueError(f"Unable to handle {args} as a temporal extent") + elif extent: + if isinstance(extent, (list, tuple)) and len(extent) == 2: + start_date, end_date = extent + elif isinstance(extent, str): + # Special case: extent is given as a single string (e.g. "2021" for full year extent + # or "2021-04" for full month extent): convert that to the appropriate extent tuple. + start_date, end_date = _convert_abbreviated_date(extent), _get_end_of_time_slot(extent) + else: + raise ValueError(f"Unable to handle {extent} as a temporal extent") + start_date = _convert_abbreviated_date(start_date) + end_date = _convert_abbreviated_date(end_date) + return convertor(start_date) if start_date else None, convertor(end_date) if end_date else None + + +class _TypeOfDateString(Enum): + """Enum that denotes which kind of date a string represents. + + This is an internal helper class, not intended to be public. + """ + + INVALID = 0 # It was neither of the options below + YEAR = 1 + MONTH = 2 + DAY = 3 + DATETIME = 4 + + +_REGEX_DAY = re.compile(r"^(\d{4})[:/_-](\d{2})[:/_-](\d{2})$") +_REGEX_MONTH = re.compile(r"^(\d{4})[:/_-](\d{2})$") +_REGEX_YEAR = re.compile(r"^\d{4}$") + + +def _get_end_of_time_slot(date: str) -> Union[dt.date, str]: + """Calculate the end of a left-closed period: the first day after a year or month.""" + if not isinstance(date, str): + return date + + date_converted = _convert_abbreviated_date(date) + granularity = _type_of_date_string(date) + if granularity == _TypeOfDateString.YEAR: + return dt.date(date_converted.year + 1, 1, 1) + elif granularity == _TypeOfDateString.MONTH: + if date_converted.month == 12: + return dt.date(date_converted.year + 1, 1, 1) + else: + return dt.date(date_converted.year, date_converted.month + 1, 1) + elif granularity == _TypeOfDateString.DAY: + # TODO: also support day granularity in _convert_abbreviated_date so that we don't need ad-hoc parsing here + return dt.date(*(int(x) for x in _REGEX_DAY.match(date).group(1, 2, 3))) + dt.timedelta(days=1) + else: + # Don't convert: it is a day or datetime. + return date + + +def _convert_abbreviated_date( + date: Union[str, dt.date, dt.datetime, Any], +) -> Union[str, dt.date, dt.datetime, Any]: + """ + Helper function to convert a year- or month-abreviated strings (e.g. "2021" or "2021-03") into a date + (first day of the corresponding period). Other values are returned as original. + + :param date: some kind of date representation: + + - A string, formatted "yyyy", "yyyy-mm", "yyyy-mm-dd" or with even more granularity + - Any other type (e.g. ``datetime.date``, ``datetime.datetime``, a parameter, ...) + + :return: + If input was a string representing a year or a month: + a ``datetime.date`` that represents the first day of that year or month. + Otherwise, the original version is returned as-is. + + :raises ValueError: + when ``date`` was a string but not recognized as a date representation + + Examples + -------- + + >>> # For year and month: "round down" to fist day: + >>> _convert_abbreviated_date("2021") + datetime.date(2021, 1, 1) + >>> _convert_abbreviated_date("2022-08") + datetime.date(2022, 8, 1) + + >>> # Preserve other values + >>> _convert_abbreviated_date("2022-08-15") + '2022-08-15' + """ + if not isinstance(date, str): + return date + + # TODO: avoid double regex matching? Once in _type_of_date_string and once here. + type_of_date = _type_of_date_string(date) + if type_of_date == _TypeOfDateString.INVALID: + raise ValueError( + f"The value of date='{date}' does not represent any of: " + + "a year ('yyyy'), a year + month ('yyyy-dd'), a date, or a datetime." + ) + + if type_of_date in [_TypeOfDateString.DATETIME, _TypeOfDateString.DAY]: + # TODO: also convert these to `date` or `datetime` for more internal consistency. + return date + + if type_of_date == _TypeOfDateString.MONTH: + match_month = _REGEX_MONTH.match(date) + year = int(match_month.group(1)) + month = int(match_month.group(2)) + else: + year = int(date) + month = 1 + + return dt.date(year, month, 1) + + +def _type_of_date_string(date: str) -> _TypeOfDateString: + """Returns which type of date the string represents: year, month, day or datetime.""" + + if not isinstance(date, str): + raise TypeError("date must be a string") + + try: + rfc3339.parse_datetime(date) + return _TypeOfDateString.DATETIME + except ValueError: + pass + + # Using a separate and stricter regular expressions to detect day, month, + # or year. Having a regex that only matches one type of period makes it + # easier to check it is effectively only a year, or only a month, + # but not a day. Datetime strings are more complex so we use rfc3339 to + # check whether or not it represents a datetime. + match_day = _REGEX_DAY.match(date) + match_month = _REGEX_MONTH.match(date) + match_year = _REGEX_YEAR.match(date) + + if match_day: + return _TypeOfDateString.DAY + if match_month: + return _TypeOfDateString.MONTH + if match_year: + return _TypeOfDateString.YEAR + + return _TypeOfDateString.INVALID diff --git a/lib/openeo/extra/__init__.py b/lib/openeo/extra/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/lib/openeo/extra/job_management/__init__.py b/lib/openeo/extra/job_management/__init__.py new file mode 100644 index 000000000..42e3420f0 --- /dev/null +++ b/lib/openeo/extra/job_management/__init__.py @@ -0,0 +1,1166 @@ +import abc +import collections +import contextlib +import dataclasses +import datetime +import json +import logging +import re +import time +import warnings +from pathlib import Path +from threading import Thread +from typing import ( + Any, + Callable, + Dict, + Iterable, + List, + Mapping, + NamedTuple, + Optional, + Union, +) + +import numpy +import pandas as pd +import requests +import shapely.errors +import shapely.geometry.base +import shapely.wkt +from requests.adapters import HTTPAdapter, Retry + +from openeo import BatchJob, Connection +from openeo.internal.processes.parse import ( + Parameter, + Process, + parse_remote_process_definition, +) +from openeo.rest import OpenEoApiError +from openeo.util import LazyLoadCache, deep_get, repr_truncate, rfc3339 + +_log = logging.getLogger(__name__) + + +class _Backend(NamedTuple): + """Container for backend info/settings""" + + # callable to create a backend connection + get_connection: Callable[[], Connection] + # Maximum number of jobs to allow in parallel on a backend + parallel_jobs: int + + +MAX_RETRIES = 5 + +# Sentinel value to indicate that a parameter was not set +_UNSET = object() + + +class JobDatabaseInterface(metaclass=abc.ABCMeta): + """ + Interface for a database of job metadata to use with the :py:class:`MultiBackendJobManager`, + allowing to regularly persist the job metadata while polling the job statuses + and resume/restart the job tracking after it was interrupted. + + .. versionadded:: 0.31.0 + """ + + @abc.abstractmethod + def exists(self) -> bool: + """Does the job database already exist, to read job data from?""" + ... + + @abc.abstractmethod + def persist(self, df: pd.DataFrame): + """ + Store job data to the database. + The provided dataframe may contain partial information, which is merged into the larger database. + + :param df: job data to store. + """ + ... + + @abc.abstractmethod + def count_by_status(self, statuses: Iterable[str] = ()) -> dict: + """ + Retrieve the number of jobs per status. + + :param statuses: List/set of statuses to include. If empty, all statuses are included. + + :return: dictionary with status as key and the count as value. + """ + ... + + @abc.abstractmethod + def get_by_status(self, statuses: List[str], max=None) -> pd.DataFrame: + """ + Returns a dataframe with jobs, filtered by status. + + :param statuses: List of statuses to include. + :param max: Maximum number of jobs to return. + + :return: DataFrame with jobs filtered by status. + """ + ... + + +def _start_job_default(row: pd.Series, connection: Connection, *args, **kwargs): + raise NotImplementedError("No 'start_job' callable provided") + + +@dataclasses.dataclass(frozen=True) +class _ColumnProperties: + """Expected/required properties of a column in the job manager related dataframes""" + + dtype: str = "object" + default: Any = None + + +class MultiBackendJobManager: + """ + Tracker for multiple jobs on multiple backends. + + Usage example: + + .. code-block:: python + + import logging + import pandas as pd + import openeo + from openeo.extra.job_management import MultiBackendJobManager + + logging.basicConfig( + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + level=logging.INFO + ) + + manager = MultiBackendJobManager() + manager.add_backend("foo", connection=openeo.connect("http://foo.test")) + manager.add_backend("bar", connection=openeo.connect("http://bar.test")) + + jobs_df = pd.DataFrame(...) + output_file = "jobs.csv" + + def start_job( + row: pd.Series, + connection: openeo.Connection, + **kwargs + ) -> openeo.BatchJob: + year = row["year"] + cube = connection.load_collection( + ..., + temporal_extent=[f"{year}-01-01", f"{year+1}-01-01"], + ) + ... + return cube.create_job(...) + + manager.run_jobs(df=jobs_df, start_job=start_job, output_file=output_file) + + See :py:meth:`.run_jobs` for more information on the ``start_job`` callable. + + :param poll_sleep: + How many seconds to sleep between polls. + + :param root_dir: + Root directory to save files for the jobs, e.g. metadata and error logs. + This defaults to "." the current directory. + + Each job gets its own subfolder in this root directory. + You can use the following methods to find the relevant paths, + based on the job ID: + + - get_job_dir + - get_error_log_path + - get_job_metadata_path + + :param cancel_running_job_after: + Optional temporal limit (in seconds) after which running jobs should be canceled + by the job manager. + + .. versionadded:: 0.14.0 + + .. versionchanged:: 0.32.0 + Added ``cancel_running_job_after`` parameter. + """ + + # Expected columns in the job DB dataframes. + # TODO: make this part of public API when settled? + _COLUMN_REQUIREMENTS: Mapping[str, _ColumnProperties] = { + "id": _ColumnProperties(dtype="str"), + "backend_name": _ColumnProperties(dtype="str"), + "status": _ColumnProperties(dtype="str", default="not_started"), + # TODO: use proper date/time dtype instead of legacy str for start times? + "start_time": _ColumnProperties(dtype="str"), + "running_start_time": _ColumnProperties(dtype="str"), + # TODO: these columns "cpu", "memory", "duration" are not referenced explicitly from MultiBackendJobManager, + # but are indirectly coupled through handling of VITO-specific "usage" metadata in `_track_statuses`. + # Since bfd99e34 they are not really required to be present anymore, can we make that more explicit? + "cpu": _ColumnProperties(dtype="str"), + "memory": _ColumnProperties(dtype="str"), + "duration": _ColumnProperties(dtype="str"), + "costs": _ColumnProperties(dtype="float64"), + } + + def __init__( + self, + poll_sleep: int = 60, + root_dir: Optional[Union[str, Path]] = ".", + *, + cancel_running_job_after: Optional[int] = None, + ): + """Create a MultiBackendJobManager.""" + self._stop_thread = None + self.backends: Dict[str, _Backend] = {} + self.poll_sleep = poll_sleep + self._connections: Dict[str, _Backend] = {} + + # An explicit None or "" should also default to "." + self._root_dir = Path(root_dir or ".") + + self._cancel_running_job_after = ( + datetime.timedelta(seconds=cancel_running_job_after) if cancel_running_job_after is not None else None + ) + self._thread = None + + def add_backend( + self, + name: str, + connection: Union[Connection, Callable[[], Connection]], + parallel_jobs: int = 2, + ): + """ + Register a backend with a name and a Connection getter. + + :param name: + Name of the backend. + :param connection: + Either a Connection to the backend, or a callable to create a backend connection. + :param parallel_jobs: + Maximum number of jobs to allow in parallel on a backend. + """ + + # TODO: Code might become simpler if we turn _Backend into class move this logic there. + # We would need to keep add_backend here as part of the public API though. + # But the amount of unrelated "stuff to manage" would be less (better cohesion) + if isinstance(connection, Connection): + c = connection + connection = lambda: c + assert callable(connection) + self.backends[name] = _Backend(get_connection=connection, parallel_jobs=parallel_jobs) + + def _get_connection(self, backend_name: str, resilient: bool = True) -> Connection: + """Get a connection for the backend and optionally make it resilient (adds retry behavior) + + The default is to get a resilient connection, but if necessary you can turn it off with + resilient=False + """ + + # TODO: Code could be simplified if _Backend is a class and this method is moved there. + # TODO: Is it better to make this a public method? + + # Reuse the connection if we can, in order to avoid modifying the same connection several times. + # This is to avoid adding the retry HTTPAdapter multiple times. + # Remember that the get_connection attribute on _Backend can be a Connection object instead + # of a callable, so we don't want to assume it is a fresh connection that doesn't have the + # retry adapter yet. + if backend_name in self._connections: + return self._connections[backend_name] + + connection = self.backends[backend_name].get_connection() + # If we really need it we can skip making it resilient, but by default it should be resilient. + if resilient: + self._make_resilient(connection) + + self._connections[backend_name] = connection + return connection + + @staticmethod + def _make_resilient(connection): + """Add an HTTPAdapter that retries the request if it fails. + + Retry for the following HTTP 50x statuses: + 502 Bad Gateway + 503 Service Unavailable + 504 Gateway Timeout + """ + # TODO: refactor this helper out of this class and unify with `openeo_driver.util.http.requests_with_retry` + status_forcelist = [500, 502, 503, 504] + retries = Retry( + total=MAX_RETRIES, + read=MAX_RETRIES, + other=MAX_RETRIES, + status=MAX_RETRIES, + backoff_factor=0.1, + status_forcelist=status_forcelist, + allowed_methods=["HEAD", "GET", "OPTIONS", "POST"], + ) + connection.session.mount("https://", HTTPAdapter(max_retries=retries)) + connection.session.mount("http://", HTTPAdapter(max_retries=retries)) + + @classmethod + def _normalize_df(cls, df: pd.DataFrame) -> pd.DataFrame: + """ + Normalize given pandas dataframe (creating a new one): + ensure we have the required columns. + + :param df: The dataframe to normalize. + :return: a new dataframe that is normalized. + """ + new_columns = {col: req.default for (col, req) in cls._COLUMN_REQUIREMENTS.items() if col not in df.columns} + df = df.assign(**new_columns) + + return df + + def start_job_thread(self, start_job: Callable[[], BatchJob], job_db: JobDatabaseInterface): + """ + Start running the jobs in a separate thread, returns afterwards. + + :param start_job: + A callback which will be invoked with, amongst others, + the row of the dataframe for which a job should be created and/or started. + This callable should return a :py:class:`openeo.rest.job.BatchJob` object. + + The following parameters will be passed to ``start_job``: + + ``row`` (:py:class:`pandas.Series`): + The row in the pandas dataframe that stores the jobs state and other tracked data. + + ``connection_provider``: + A getter to get a connection by backend name. + Typically, you would need either the parameter ``connection_provider``, + or the parameter ``connection``, but likely you will not need both. + + ``connection`` (:py:class:`Connection`): + The :py:class:`Connection` itself, that has already been created. + Typically, you would need either the parameter ``connection_provider``, + or the parameter ``connection``, but likely you will not need both. + + ``provider`` (``str``): + The name of the backend that will run the job. + + You do not have to define all the parameters described below, but if you leave + any of them out, then remember to include the ``*args`` and ``**kwargs`` parameters. + Otherwise you will have an exception because :py:meth:`run_jobs` passes unknown parameters to ``start_job``. + :param job_db: + Job database to load/store existing job status data and other metadata from/to. + Can be specified as a path to CSV or Parquet file, + or as a custom database object following the :py:class:`JobDatabaseInterface` interface. + + .. note:: + Support for Parquet files depends on the ``pyarrow`` package + as :ref:`optional dependency `. + + .. versionadded:: 0.32.0 + """ + + # Resume from existing db + _log.info(f"Resuming `run_jobs` from existing {job_db}") + + self._stop_thread = False + + def run_loop(): + + # TODO: support user-provided `stats` + stats = collections.defaultdict(int) + + while ( + sum(job_db.count_by_status(statuses=["not_started", "created", "queued", "running"]).values()) > 0 + and not self._stop_thread + ): + self._job_update_loop(job_db=job_db, start_job=start_job) + stats["run_jobs loop"] += 1 + + _log.info(f"Job status histogram: {job_db.count_by_status()}. Run stats: {dict(stats)}") + # Do sequence of micro-sleeps to allow for quick thread exit + for _ in range(int(max(1, self.poll_sleep))): + time.sleep(1) + if self._stop_thread: + break + + self._thread = Thread(target=run_loop) + self._thread.start() + + def stop_job_thread(self, timeout_seconds: Optional[float] = _UNSET): + """ + Stop the job polling thread. + + :param timeout_seconds: The time to wait for the thread to stop. + By default, it will wait for 2 times the poll_sleep time. + Set to None to wait indefinitely. + + .. versionadded:: 0.32.0 + """ + if self._thread is not None: + self._stop_thread = True + if timeout_seconds is _UNSET: + timeout_seconds = 2 * self.poll_sleep + self._thread.join(timeout_seconds) + if self._thread.is_alive(): + _log.warning("Job thread did not stop after timeout") + else: + _log.error("No job thread to stop") + + def run_jobs( + self, + df: Optional[pd.DataFrame] = None, + start_job: Callable[[], BatchJob] = _start_job_default, + job_db: Union[str, Path, JobDatabaseInterface, None] = None, + **kwargs, + ) -> dict: + """Runs jobs, specified in a dataframe, and tracks parameters. + + :param df: + DataFrame that specifies the jobs, and tracks the jobs' statuses. If None, the job_db has to be specified and will be used. + + :param start_job: + A callback which will be invoked with, amongst others, + the row of the dataframe for which a job should be created and/or started. + This callable should return a :py:class:`openeo.rest.job.BatchJob` object. + + The following parameters will be passed to ``start_job``: + + ``row`` (:py:class:`pandas.Series`): + The row in the pandas dataframe that stores the jobs state and other tracked data. + + ``connection_provider``: + A getter to get a connection by backend name. + Typically, you would need either the parameter ``connection_provider``, + or the parameter ``connection``, but likely you will not need both. + + ``connection`` (:py:class:`Connection`): + The :py:class:`Connection` itself, that has already been created. + Typically, you would need either the parameter ``connection_provider``, + or the parameter ``connection``, but likely you will not need both. + + ``provider`` (``str``): + The name of the backend that will run the job. + + You do not have to define all the parameters described below, but if you leave + any of them out, then remember to include the ``*args`` and ``**kwargs`` parameters. + Otherwise you will have an exception because :py:meth:`run_jobs` passes unknown parameters to ``start_job``. + + :param job_db: + Job database to load/store existing job status data and other metadata from/to. + Can be specified as a path to CSV or Parquet file, + or as a custom database object following the :py:class:`JobDatabaseInterface` interface. + + .. note:: + Support for Parquet files depends on the ``pyarrow`` package + as :ref:`optional dependency `. + + :return: dictionary with stats collected during the job running loop. + Note that the set of fields in this dictionary is experimental + and subject to change + + .. versionchanged:: 0.31.0 + Added support for persisting the job metadata in Parquet format. + + .. versionchanged:: 0.31.0 + Replace ``output_file`` argument with ``job_db`` argument, + which can be a path to a CSV or Parquet file, + or a user-defined :py:class:`JobDatabaseInterface` object. + The deprecated ``output_file`` argument is still supported for now. + + .. versionchanged:: 0.33.0 + return a stats dictionary + """ + # TODO Defining start_jobs as a Protocol might make its usage more clear, and avoid complicated docstrings, + + # Backwards compatibility for deprecated `output_file` argument + if "output_file" in kwargs: + if job_db is not None: + raise ValueError("Only one of `output_file` and `job_db` should be provided") + warnings.warn( + "The `output_file` argument is deprecated. Use `job_db` instead.", DeprecationWarning, stacklevel=2 + ) + job_db = kwargs.pop("output_file") + assert not kwargs, f"Unexpected keyword arguments: {kwargs!r}" + + if isinstance(job_db, (str, Path)): + job_db = get_job_db(path=job_db) + + if not isinstance(job_db, JobDatabaseInterface): + raise ValueError(f"Unsupported job_db {job_db!r}") + + if job_db.exists(): + # Resume from existing db + _log.info(f"Resuming `run_jobs` from existing {job_db}") + elif df is not None: + # TODO: start showing deprecation warnings for this usage pattern? + job_db.initialize_from_df(df) + + # TODO: support user-provided `stats` + stats = collections.defaultdict(int) + + while sum(job_db.count_by_status(statuses=["not_started", "created", "queued", "running"]).values()) > 0: + self._job_update_loop(job_db=job_db, start_job=start_job, stats=stats) + stats["run_jobs loop"] += 1 + + # Show current stats and sleep + _log.info(f"Job status histogram: {job_db.count_by_status()}. Run stats: {dict(stats)}") + time.sleep(self.poll_sleep) + stats["sleep"] += 1 + + return stats + + def _job_update_loop( + self, job_db: JobDatabaseInterface, start_job: Callable[[], BatchJob], stats: Optional[dict] = None + ): + """ + Inner loop logic of job management: + go through the necessary jobs to check for status updates, + trigger status events, start new jobs when there is room for them, etc. + """ + if not self.backends: + raise RuntimeError("No backends registered") + + stats = stats if stats is not None else collections.defaultdict(int) + + with ignore_connection_errors(context="get statuses"): + self._track_statuses(job_db, stats=stats) + stats["track_statuses"] += 1 + + not_started = job_db.get_by_status(statuses=["not_started"], max=200).copy() + if len(not_started) > 0: + # Check number of jobs running at each backend + running = job_db.get_by_status(statuses=["created", "queued", "running"]) + stats["job_db get_by_status"] += 1 + per_backend = running.groupby("backend_name").size().to_dict() + _log.info(f"Running per backend: {per_backend}") + total_added = 0 + for backend_name in self.backends: + backend_load = per_backend.get(backend_name, 0) + if backend_load < self.backends[backend_name].parallel_jobs: + to_add = self.backends[backend_name].parallel_jobs - backend_load + for i in not_started.index[total_added : total_added + to_add]: + self._launch_job(start_job, df=not_started, i=i, backend_name=backend_name, stats=stats) + stats["job launch"] += 1 + + job_db.persist(not_started.loc[i : i + 1]) + stats["job_db persist"] += 1 + total_added += 1 + + def _launch_job(self, start_job, df, i, backend_name, stats: Optional[dict] = None): + """Helper method for launching jobs + + :param start_job: + A callback which will be invoked with the row of the dataframe for which a job should be started. + This callable should return a :py:class:`openeo.rest.job.BatchJob` object. + + See also: + `MultiBackendJobManager.run_jobs` for the parameters and return type of this callable + + Even though it is called here in `_launch_job` and that is where the constraints + really come from, the public method `run_jobs` needs to document `start_job` anyway, + so let's avoid duplication in the docstrings. + + :param df: + DataFrame that specifies the jobs, and tracks the jobs' statuses. + + :param i: + index of the job's row in dataframe df + + :param backend_name: + name of the backend that will execute the job. + """ + stats = stats if stats is not None else collections.defaultdict(int) + + df.loc[i, "backend_name"] = backend_name + row = df.loc[i] + try: + _log.info(f"Starting job on backend {backend_name} for {row.to_dict()}") + connection = self._get_connection(backend_name, resilient=True) + + stats["start_job call"] += 1 + job = start_job( + row=row, + connection_provider=self._get_connection, + connection=connection, + provider=backend_name, + ) + except requests.exceptions.ConnectionError as e: + _log.warning(f"Failed to start job for {row.to_dict()}", exc_info=True) + df.loc[i, "status"] = "start_failed" + stats["start_job error"] += 1 + else: + df.loc[i, "start_time"] = rfc3339.utcnow() + if job: + df.loc[i, "id"] = job.job_id + with ignore_connection_errors(context="get status"): + status = job.status() + stats["job get status"] += 1 + df.loc[i, "status"] = status + if status == "created": + # start job if not yet done by callback + try: + job.start() + stats["job start"] += 1 + df.loc[i, "status"] = job.status() + stats["job get status"] += 1 + except OpenEoApiError as e: + _log.error(e) + df.loc[i, "status"] = "start_failed" + stats["job start error"] += 1 + else: + # TODO: what is this "skipping" about actually? + df.loc[i, "status"] = "skipped" + stats["start_job skipped"] += 1 + + def on_job_done(self, job: BatchJob, row): + """ + Handles jobs that have finished. Can be overridden to provide custom behaviour. + + Default implementation downloads the results into a folder containing the title. + + :param job: The job that has finished. + :param row: DataFrame row containing the job's metadata. + """ + # TODO: param `row` is never accessed in this method. Remove it? Is this intended for future use? + + job_metadata = job.describe() + job_dir = self.get_job_dir(job.job_id) + metadata_path = self.get_job_metadata_path(job.job_id) + + self.ensure_job_dir_exists(job.job_id) + job.get_results().download_files(target=job_dir) + + with metadata_path.open("w", encoding="utf-8") as f: + json.dump(job_metadata, f, ensure_ascii=False) + + def on_job_error(self, job: BatchJob, row): + """ + Handles jobs that stopped with errors. Can be overridden to provide custom behaviour. + + Default implementation writes the error logs to a JSON file. + + :param job: The job that has finished. + :param row: DataFrame row containing the job's metadata. + """ + # TODO: param `row` is never accessed in this method. Remove it? Is this intended for future use? + + error_logs = job.logs(level="error") + error_log_path = self.get_error_log_path(job.job_id) + + if len(error_logs) > 0: + self.ensure_job_dir_exists(job.job_id) + error_log_path.write_text(json.dumps(error_logs, indent=2)) + + def on_job_cancel(self, job: BatchJob, row): + """ + Handle a job that was cancelled. Can be overridden to provide custom behaviour. + + Default implementation does not do anything. + + :param job: The job that was canceled. + :param row: DataFrame row containing the job's metadata. + """ + pass + + def _cancel_prolonged_job(self, job: BatchJob, row): + """Cancel the job if it has been running for too long.""" + job_running_start_time = rfc3339.parse_datetime(row["running_start_time"], with_timezone=True) + elapsed = datetime.datetime.now(tz=datetime.timezone.utc) - job_running_start_time + if elapsed > self._cancel_running_job_after: + try: + _log.info( + f"Cancelling long-running job {job.job_id} (after {elapsed}, running since {job_running_start_time})" + ) + job.stop() + except OpenEoApiError as e: + _log.error(f"Failed to cancel long-running job {job.job_id}: {e}") + + def get_job_dir(self, job_id: str) -> Path: + """Path to directory where job metadata, results and error logs are be saved.""" + return self._root_dir / f"job_{job_id}" + + def get_error_log_path(self, job_id: str) -> Path: + """Path where error log file for the job is saved.""" + return self.get_job_dir(job_id) / f"job_{job_id}_errors.json" + + def get_job_metadata_path(self, job_id: str) -> Path: + """Path where job metadata file is saved.""" + return self.get_job_dir(job_id) / f"job_{job_id}.json" + + def ensure_job_dir_exists(self, job_id: str) -> Path: + """Create the job folder if it does not exist yet.""" + job_dir = self.get_job_dir(job_id) + if not job_dir.exists(): + job_dir.mkdir(parents=True) + + def _track_statuses(self, job_db: JobDatabaseInterface, stats: Optional[dict] = None): + """ + Tracks status (and stats) of running jobs (in place). + Optionally cancels jobs when running too long. + """ + stats = stats if stats is not None else collections.defaultdict(int) + + active = job_db.get_by_status(statuses=["created", "queued", "running"]).copy() + for i in active.index: + job_id = active.loc[i, "id"] + backend_name = active.loc[i, "backend_name"] + previous_status = active.loc[i, "status"] + + try: + con = self._get_connection(backend_name) + the_job = con.job(job_id) + job_metadata = the_job.describe() + stats["job describe"] += 1 + new_status = job_metadata["status"] + + _log.info( + f"Status of job {job_id!r} (on backend {backend_name}) is {new_status!r} (previously {previous_status!r})" + ) + + if new_status == "finished": + stats["job finished"] += 1 + self.on_job_done(the_job, active.loc[i]) + + if previous_status != "error" and new_status == "error": + stats["job failed"] += 1 + self.on_job_error(the_job, active.loc[i]) + + if previous_status in {"created", "queued"} and new_status == "running": + stats["job started running"] += 1 + active.loc[i, "running_start_time"] = rfc3339.utcnow() + + if new_status == "canceled": + stats["job canceled"] += 1 + self.on_job_cancel(the_job, active.loc[i]) + + if self._cancel_running_job_after and new_status == "running": + self._cancel_prolonged_job(the_job, active.loc[i]) + + active.loc[i, "status"] = new_status + + # TODO: there is well hidden coupling here with "cpu", "memory" and "duration" from `_normalize_df` + for key in job_metadata.get("usage", {}).keys(): + if key in active.columns: + active.loc[i, key] = _format_usage_stat(job_metadata, key) + if "costs" in job_metadata.keys(): + active.loc[i, "costs"] = job_metadata.get("costs") + + except OpenEoApiError as e: + # TODO: inspect status code and e.g. differentiate between 4xx/5xx + stats["job tracking error"] += 1 + _log.warning(f"Error while tracking status of job {job_id!r} on backend {backend_name}: {e!r}") + + stats["job_db persist"] += 1 + job_db.persist(active) + + +def _format_usage_stat(job_metadata: dict, field: str) -> str: + value = deep_get(job_metadata, "usage", field, "value", default=0) + unit = deep_get(job_metadata, "usage", field, "unit", default="") + return f"{value} {unit}".strip() + + +@contextlib.contextmanager +def ignore_connection_errors(context: Optional[str] = None, sleep: int = 5): + """Context manager to ignore connection errors.""" + # TODO: move this out of this module and make it a more public utility? + try: + yield + except requests.exceptions.ConnectionError as e: + _log.warning(f"Ignoring connection error (context {context or 'n/a'}): {e}") + # Back off a bit + time.sleep(sleep) + + +class FullDataFrameJobDatabase(JobDatabaseInterface): + + def __init__(self): + super().__init__() + self._df = None + + def initialize_from_df(self, df: pd.DataFrame, *, on_exists: str = "error"): + """ + Initialize the job database from a given dataframe, + which will be first normalized to be compatible + with :py:class:`MultiBackendJobManager` usage. + + :param df: dataframe with some columns your ``start_job`` callable expects + :param on_exists: what to do when the job database already exists (persisted on disk): + - "error": (default) raise an exception + - "skip": work with existing database, ignore given dataframe and skip any initialization + + :return: initialized job database. + + .. versionadded:: 0.33.0 + """ + # TODO: option to provide custom MultiBackendJobManager subclass with custom normalize? + if self.exists(): + if on_exists == "skip": + return self + elif on_exists == "error": + raise FileExistsError(f"Job database {self!r} already exists.") + else: + # TODO handle other on_exists modes: e.g. overwrite, merge, ... + raise ValueError(f"Invalid on_exists={on_exists!r}") + df = MultiBackendJobManager._normalize_df(df) + self.persist(df) + # Return self to allow chaining with constructor. + return self + + @abc.abstractmethod + def read(self) -> pd.DataFrame: + """ + Read job data from the database as pandas DataFrame. + + :return: loaded job data. + """ + ... + + @property + def df(self) -> pd.DataFrame: + if self._df is None: + self._df = self.read() + return self._df + + def count_by_status(self, statuses: Iterable[str] = ()) -> dict: + status_histogram = self.df.groupby("status").size().to_dict() + statuses = set(statuses) + if statuses: + status_histogram = {k: v for k, v in status_histogram.items() if k in statuses} + return status_histogram + + def get_by_status(self, statuses, max=None) -> pd.DataFrame: + """ + Returns a dataframe with jobs, filtered by status. + + :param statuses: List of statuses to include. + :param max: Maximum number of jobs to return. + + :return: DataFrame with jobs filtered by status. + """ + df = self.df + filtered = df[df.status.isin(statuses)] + return filtered.head(max) if max is not None else filtered + + def _merge_into_df(self, df: pd.DataFrame): + if self._df is not None: + self._df.update(df, overwrite=True) + else: + self._df = df + + +class CsvJobDatabase(FullDataFrameJobDatabase): + """ + Persist/load job metadata with a CSV file. + + :implements: :py:class:`JobDatabaseInterface` + :param path: Path to local CSV file. + + .. note:: + Support for GeoPandas dataframes depends on the ``geopandas`` package + as :ref:`optional dependency `. + + .. versionadded:: 0.31.0 + """ + + def __init__(self, path: Union[str, Path]): + super().__init__() + self.path = Path(path) + + def __repr__(self): + return f"{self.__class__.__name__}({str(self.path)!r})" + + def exists(self) -> bool: + return self.path.exists() + + def _is_valid_wkt(self, wkt: str) -> bool: + try: + shapely.wkt.loads(wkt) + return True + except shapely.errors.WKTReadingError: + return False + + def read(self) -> pd.DataFrame: + df = pd.read_csv( + self.path, + # TODO: possible to avoid hidden coupling with MultiBackendJobManager here? + dtype={c: r.dtype for (c, r) in MultiBackendJobManager._COLUMN_REQUIREMENTS.items()}, + ) + if ( + "geometry" in df.columns + and df["geometry"].dtype.name != "geometry" + and self._is_valid_wkt(df["geometry"].iloc[0]) + ): + import geopandas + + # `df.to_csv()` in `persist()` has encoded geometries as WKT, so we decode that here. + df = geopandas.GeoDataFrame(df, geometry=geopandas.GeoSeries.from_wkt(df["geometry"])) + return df + + def persist(self, df: pd.DataFrame): + self._merge_into_df(df) + self.path.parent.mkdir(parents=True, exist_ok=True) + self.df.to_csv(self.path, index=False) + + +class ParquetJobDatabase(FullDataFrameJobDatabase): + """ + Persist/load job metadata with a Parquet file. + + :implements: :py:class:`JobDatabaseInterface` + :param path: Path to the Parquet file. + + .. note:: + Support for Parquet files depends on the ``pyarrow`` package + as :ref:`optional dependency `. + + Support for GeoPandas dataframes depends on the ``geopandas`` package + as :ref:`optional dependency `. + + .. versionadded:: 0.31.0 + """ + + def __init__(self, path: Union[str, Path]): + super().__init__() + self.path = Path(path) + + def __repr__(self): + return f"{self.__class__.__name__}({str(self.path)!r})" + + def exists(self) -> bool: + return self.path.exists() + + def read(self) -> pd.DataFrame: + # Unfortunately, a naive `pandas.read_parquet()` does not easily allow + # reconstructing geometries from a GeoPandas Parquet file. + # And vice-versa, `geopandas.read_parquet()` does not support reading + # Parquet file without geometries. + # So we have to guess which case we have. + # TODO is there a cleaner way to do this? + import pyarrow.parquet + + metadata = pyarrow.parquet.read_metadata(self.path) + if b"geo" in metadata.metadata: + import geopandas + + return geopandas.read_parquet(self.path) + else: + return pd.read_parquet(self.path) + + def persist(self, df: pd.DataFrame): + self._merge_into_df(df) + self.path.parent.mkdir(parents=True, exist_ok=True) + self.df.to_parquet(self.path, index=False) + + +def get_job_db(path: Union[str, Path]) -> JobDatabaseInterface: + """ + Factory to get a job database at a given path, + guessing the database type from filename extension. + + :param path: path to job database file. + + .. versionadded:: 0.33.0 + """ + path = Path(path) + if path.suffix.lower() in {".csv"}: + job_db = CsvJobDatabase(path=path) + elif path.suffix.lower() in {".parquet", ".geoparquet"}: + job_db = ParquetJobDatabase(path=path) + else: + raise ValueError(f"Could not guess job database type from {path!r}") + return job_db + + +def create_job_db(path: Union[str, Path], df: pd.DataFrame, *, on_exists: str = "error"): + """ + Factory to create a job database at given path, + initialized from a given dataframe, + and its database type guessed from filename extension. + + :param path: Path to the job database file. + :param df: DataFrame to store in the job database. + :param on_exists: What to do when the job database already exists: + - "error": (default) raise an exception + - "skip": work with existing database, ignore given dataframe and skip any initialization + + .. versionadded:: 0.33.0 + """ + job_db = get_job_db(path) + if isinstance(job_db, FullDataFrameJobDatabase): + job_db.initialize_from_df(df=df, on_exists=on_exists) + else: + raise NotImplementedError(f"Initialization of {type(job_db)} is not supported.") + return job_db + + +class ProcessBasedJobCreator: + """ + Batch job creator + (to be used together with :py:class:`MultiBackendJobManager`) + that takes a parameterized openEO process definition + (e.g a user-defined process (UDP) or a remote openEO process definition), + and creates a batch job + for each row of the dataframe managed by the :py:class:`MultiBackendJobManager` + by filling in the process parameters with corresponding row values. + + .. seealso:: + See :ref:`job-management-with-process-based-job-creator` + for more information and examples. + + Process parameters are linked to dataframe columns by name. + While this intuitive name-based matching should cover most use cases, + there are additional options for overrides or fallbacks: + + - When provided, ``parameter_column_map`` will be consulted + for resolving a process parameter name (key in the dictionary) + to a desired dataframe column name (corresponding value). + - One common case is handled automatically as convenience functionality. + + When: + + - ``parameter_column_map`` is not provided (or set to ``None``), + - and there is a *single parameter* that accepts inline GeoJSON geometries, + - and the dataframe is a GeoPandas dataframe with a *single geometry* column, + + then this parameter and this geometries column will be linked automatically. + + - If a parameter can not be matched with a column by name as described above, + a default value will be picked, + first by looking in ``parameter_defaults`` (if provided), + and then by looking up the default value from the parameter schema in the process definition. + - Finally if no (default) value can be determined and the parameter + is not flagged as optional, an error will be raised. + + + :param process_id: (optional) openEO process identifier. + Can be omitted when working with a remote process definition + that is fully defined with a URL in the ``namespace`` parameter. + :param namespace: (optional) openEO process namespace. + Typically used to provide a URL to a remote process definition. + :param parameter_defaults: (optional) default values for process parameters, + to be used when not available in the dataframe managed by + :py:class:`MultiBackendJobManager`. + :param parameter_column_map: Optional overrides + for linking process parameters to dataframe columns: + mapping of process parameter names as key + to dataframe column names as value. + + .. versionadded:: 0.33.0 + + .. warning:: + This is an experimental API subject to change, + and we greatly welcome + `feedback and suggestions for improvement `_. + + """ + + def __init__( + self, + *, + process_id: Optional[str] = None, + namespace: Union[str, None] = None, + parameter_defaults: Optional[dict] = None, + parameter_column_map: Optional[dict] = None, + ): + if process_id is None and namespace is None: + raise ValueError("At least one of `process_id` and `namespace` should be provided.") + self._process_id = process_id + self._namespace = namespace + self._parameter_defaults = parameter_defaults or {} + self._parameter_column_map = parameter_column_map + self._cache = LazyLoadCache() + + def _get_process_definition(self, connection: Connection) -> Process: + if isinstance(self._namespace, str) and re.match("https?://", self._namespace): + # Remote process definition handling + return self._cache.get( + key=("remote_process_definition", self._namespace, self._process_id), + load=lambda: parse_remote_process_definition(namespace=self._namespace, process_id=self._process_id), + ) + elif self._namespace is None: + # Handling of a user-specific UDP + udp_raw = connection.user_defined_process(self._process_id).describe() + return Process.from_dict(udp_raw) + else: + raise NotImplementedError( + f"Unsupported process definition source udp_id={self._process_id!r} namespace={self._namespace!r}" + ) + + def start_job(self, row: pd.Series, connection: Connection, **_) -> BatchJob: + """ + Implementation of the ``start_job`` callable interface + of :py:meth:`MultiBackendJobManager.run_jobs` + to create a job based on given dataframe row + + :param row: The row in the pandas dataframe that stores the jobs state and other tracked data. + :param connection: The connection to the backend. + """ + # TODO: refactor out some methods, for better reuse and decoupling: + # `get_arguments()` (to build the arguments dictionary), `get_cube()` (to create the cube), + + process_definition = self._get_process_definition(connection=connection) + process_id = process_definition.id + parameters = process_definition.parameters or [] + + if self._parameter_column_map is None: + self._parameter_column_map = self._guess_parameter_column_map(parameters=parameters, row=row) + + arguments = {} + for parameter in parameters: + param_name = parameter.name + column_name = self._parameter_column_map.get(param_name, param_name) + if column_name in row.index: + # Get value from dataframe row + value = row.loc[column_name] + elif param_name in self._parameter_defaults: + # Fallback on default values from constructor + value = self._parameter_defaults[param_name] + elif parameter.has_default(): + # Explicitly use default value from parameter schema + value = parameter.default + elif parameter.optional: + # Skip optional parameters without any fallback default value + continue + else: + raise ValueError(f"Missing required parameter {param_name !r} for process {process_id!r}") + + # Prepare some values/dtypes for JSON encoding + if isinstance(value, numpy.integer): + value = int(value) + elif isinstance(value, numpy.number): + value = float(value) + elif isinstance(value, shapely.geometry.base.BaseGeometry): + value = shapely.geometry.mapping(value) + + arguments[param_name] = value + + cube = connection.datacube_from_process(process_id=process_id, namespace=self._namespace, **arguments) + + title = row.get("title", f"Process {process_id!r} with {repr_truncate(arguments)}") + description = row.get("description", f"Process {process_id!r} (namespace {self._namespace}) with {arguments}") + job = connection.create_job(cube, title=title, description=description) + + return job + + def __call__(self, *arg, **kwargs) -> BatchJob: + """Syntactic sugar for calling :py:meth:`start_job`.""" + return self.start_job(*arg, **kwargs) + + @staticmethod + def _guess_parameter_column_map(parameters: List[Parameter], row: pd.Series) -> dict: + """ + Guess parameter-column mapping from given parameter list and dataframe row + """ + parameter_column_map = {} + # Geometry based mapping: try to automatically map geometry columns to geojson parameters + geojson_parameters = [p.name for p in parameters if p.schema.accepts_geojson()] + geometry_columns = [i for (i, v) in row.items() if isinstance(v, shapely.geometry.base.BaseGeometry)] + if geojson_parameters and geometry_columns: + if len(geojson_parameters) == 1 and len(geometry_columns) == 1: + # Most common case: one geometry parameter and one geometry column: can be mapped naively + parameter_column_map[geojson_parameters[0]] = geometry_columns[0] + elif all(p in geometry_columns for p in geojson_parameters): + # Each geometry param has geometry column with same name: easy to map + parameter_column_map.update((p, p) for p in geojson_parameters) + else: + raise RuntimeError( + f"Problem with mapping geometry columns ({geometry_columns}) to process parameters ({geojson_parameters})" + ) + _log.debug(f"Guessed parameter-column map: {parameter_column_map}") + return parameter_column_map diff --git a/lib/openeo/extra/spectral_indices/__init__.py b/lib/openeo/extra/spectral_indices/__init__.py new file mode 100644 index 000000000..d83c37813 --- /dev/null +++ b/lib/openeo/extra/spectral_indices/__init__.py @@ -0,0 +1,2 @@ + +from openeo.extra.spectral_indices.spectral_indices import * diff --git a/lib/openeo/extra/spectral_indices/resources/awesome-spectral-indices/LICENSE b/lib/openeo/extra/spectral_indices/resources/awesome-spectral-indices/LICENSE new file mode 100644 index 000000000..7bd30da58 --- /dev/null +++ b/lib/openeo/extra/spectral_indices/resources/awesome-spectral-indices/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2021 David Montero Loaiza + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/lib/openeo/extra/spectral_indices/resources/awesome-spectral-indices/bands.json b/lib/openeo/extra/spectral_indices/resources/awesome-spectral-indices/bands.json new file mode 100644 index 000000000..052f82015 --- /dev/null +++ b/lib/openeo/extra/spectral_indices/resources/awesome-spectral-indices/bands.json @@ -0,0 +1,785 @@ +{ + "A": { + "common_name": "coastal", + "long_name": "Aersols", + "max_wavelength": 455, + "min_wavelength": 400, + "platforms": { + "landsat8": { + "band": "B1", + "bandwidth": 20.0, + "name": "Coastal Aerosol", + "platform": "Landsat 8", + "wavelength": 440.0 + }, + "landsat9": { + "band": "B1", + "bandwidth": 20.0, + "name": "Coastal Aerosol", + "platform": "Landsat 8", + "wavelength": 440.0 + }, + "planetscope": { + "band": "B1", + "bandwidth": 21.0, + "name": "Coastal Blue", + "platform": "PlanetScope", + "wavelength": 441.5 + }, + "sentinel2a": { + "band": "B1", + "bandwidth": 21, + "name": "Aerosols", + "platform": "Sentinel-2A", + "wavelength": 442.7 + }, + "sentinel2b": { + "band": "B1", + "bandwidth": 21, + "name": "Aerosols", + "platform": "Sentinel-2B", + "wavelength": 442.3 + }, + "wv2": { + "band": "B1", + "bandwidth": 50.0, + "name": "Coastal Blue", + "platform": "WorldView-2", + "wavelength": 425.0 + }, + "wv3": { + "band": "B1", + "bandwidth": 50.0, + "name": "Coastal Blue", + "platform": "WorldView-3", + "wavelength": 425.0 + } + }, + "short_name": "A" + }, + "B": { + "common_name": "blue", + "long_name": "Blue", + "max_wavelength": 530, + "min_wavelength": 450, + "platforms": { + "landsat4": { + "band": "B1", + "bandwidth": 70.0, + "name": "Blue", + "platform": "Landsat 4", + "wavelength": 485.0 + }, + "landsat5": { + "band": "B1", + "bandwidth": 70.0, + "name": "Blue", + "platform": "Landsat 5", + "wavelength": 485.0 + }, + "landsat7": { + "band": "B1", + "bandwidth": 70.0, + "name": "Blue", + "platform": "Landsat 7", + "wavelength": 485.0 + }, + "landsat8": { + "band": "B2", + "bandwidth": 60.0, + "name": "Blue", + "platform": "Landsat 8", + "wavelength": 480.0 + }, + "landsat9": { + "band": "B2", + "bandwidth": 60.0, + "name": "Blue", + "platform": "Landsat 9", + "wavelength": 480.0 + }, + "modis": { + "band": "B3", + "bandwidth": 20.0, + "name": "Blue", + "platform": "Terra/Aqua: MODIS", + "wavelength": 469.0 + }, + "planetscope": { + "band": "B2", + "bandwidth": 50.0, + "name": "Blue", + "platform": "PlanetScope", + "wavelength": 490.0 + }, + "sentinel2a": { + "band": "B2", + "bandwidth": 66.0, + "name": "Blue", + "platform": "Sentinel-2A", + "wavelength": 492.4 + }, + "sentinel2b": { + "band": "B2", + "bandwidth": 66.0, + "name": "Blue", + "platform": "Sentinel-2B", + "wavelength": 492.1 + }, + "wv2": { + "band": "B2", + "bandwidth": 60.0, + "name": "Blue", + "platform": "WorldView-2", + "wavelength": 480.0 + }, + "wv3": { + "band": "B2", + "bandwidth": 60.0, + "name": "Blue", + "platform": "WorldView-3", + "wavelength": 480.0 + } + }, + "short_name": "B" + }, + "G": { + "common_name": "green", + "long_name": "Green", + "max_wavelength": 600, + "min_wavelength": 510, + "platforms": { + "landsat4": { + "band": "B2", + "bandwidth": 80.0, + "name": "Green", + "platform": "Landsat 4", + "wavelength": 560.0 + }, + "landsat5": { + "band": "B2", + "bandwidth": 80.0, + "name": "Green", + "platform": "Landsat 5", + "wavelength": 560.0 + }, + "landsat7": { + "band": "B2", + "bandwidth": 80.0, + "name": "Green", + "platform": "Landsat 7", + "wavelength": 560.0 + }, + "landsat8": { + "band": "B3", + "bandwidth": 60.0, + "name": "Green", + "platform": "Landsat 8", + "wavelength": 560.0 + }, + "landsat9": { + "band": "B3", + "bandwidth": 60.0, + "name": "Green", + "platform": "Landsat 9", + "wavelength": 560.0 + }, + "modis": { + "band": "B4", + "bandwidth": 20.0, + "name": "Green", + "platform": "Terra/Aqua: MODIS", + "wavelength": 555.0 + }, + "planetscope": { + "band": "B4", + "bandwidth": 36.0, + "name": "Green", + "platform": "PlanetScope", + "wavelength": 565.0 + }, + "sentinel2a": { + "band": "B3", + "bandwidth": 36.0, + "name": "Green", + "platform": "Sentinel-2A", + "wavelength": 559.8 + }, + "sentinel2b": { + "band": "B3", + "bandwidth": 36.0, + "name": "Green", + "platform": "Sentinel-2B", + "wavelength": 559.0 + }, + "wv2": { + "band": "B3", + "bandwidth": 70.0, + "name": "Green", + "platform": "WorldView-2", + "wavelength": 545.0 + }, + "wv3": { + "band": "B3", + "bandwidth": 70.0, + "name": "Green", + "platform": "WorldView-3", + "wavelength": 545.0 + } + }, + "short_name": "G" + }, + "G1": { + "common_name": "green", + "long_name": "Green 1", + "max_wavelength": 550, + "min_wavelength": 510, + "platforms": { + "modis": { + "band": "B11", + "bandwidth": 10.0, + "name": "Green", + "platform": "Terra/Aqua: MODIS", + "wavelength": 531.0 + }, + "planetscope": { + "band": "B3", + "bandwidth": 36.0, + "name": "Green", + "platform": "PlanetScope", + "wavelength": 531.0 + } + }, + "short_name": "G1" + }, + "N": { + "common_name": "nir", + "long_name": "Near-Infrared (NIR)", + "max_wavelength": 900, + "min_wavelength": 760, + "platforms": { + "landsat4": { + "band": "B4", + "bandwidth": 140.0, + "name": "Near-Infrared (NIR)", + "platform": "Landsat 4", + "wavelength": 830.0 + }, + "landsat5": { + "band": "B4", + "bandwidth": 140.0, + "name": "Near-Infrared (NIR)", + "platform": "Landsat 5", + "wavelength": 830.0 + }, + "landsat7": { + "band": "B4", + "bandwidth": 130.0, + "name": "Near-Infrared (NIR)", + "platform": "Landsat 7", + "wavelength": 835.0 + }, + "landsat8": { + "band": "B5", + "bandwidth": 30.0, + "name": "Near-Infrared (NIR)", + "platform": "Landsat 8", + "wavelength": 865.0 + }, + "landsat9": { + "band": "B5", + "bandwidth": 30.0, + "name": "Near-Infrared (NIR)", + "platform": "Landsat 9", + "wavelength": 865.0 + }, + "modis": { + "band": "B2", + "bandwidth": 35.0, + "name": "Near-Infrared (NIR)", + "platform": "Terra/Aqua: MODIS", + "wavelength": 858.5 + }, + "planetscope": { + "band": "B8", + "bandwidth": 40.0, + "name": "Near-Infrared (NIR)", + "platform": "PlanetScope", + "wavelength": 865.0 + }, + "sentinel2a": { + "band": "B8", + "bandwidth": 106.0, + "name": "Near-Infrared (NIR)", + "platform": "Sentinel-2A", + "wavelength": 832.8 + }, + "sentinel2b": { + "band": "B8", + "bandwidth": 106.0, + "name": "Near-Infrared (NIR)", + "platform": "Sentinel-2B", + "wavelength": 833.0 + }, + "wv2": { + "band": "B7", + "bandwidth": 125.0, + "name": "Near-IR1", + "platform": "WorldView-2", + "wavelength": 832.5 + }, + "wv3": { + "band": "B7", + "bandwidth": 125.0, + "name": "Near-IR1", + "platform": "WorldView-3", + "wavelength": 832.5 + } + }, + "short_name": "N" + }, + "N2": { + "common_name": "nir08", + "long_name": "Near-Infrared (NIR) 2", + "max_wavelength": 880, + "min_wavelength": 850, + "platforms": { + "sentinel2a": { + "band": "B8A", + "bandwidth": 21.0, + "name": "Near-Infrared (NIR) 2 (Red Edge 4 in Google Earth Engine)", + "platform": "Sentinel-2A", + "wavelength": 864.7 + }, + "sentinel2b": { + "band": "B8A", + "bandwidth": 21.0, + "name": "Near-Infrared (NIR) 2 (Red Edge 4 in Google Earth Engine)", + "platform": "Sentinel-2B", + "wavelength": 864.0 + } + }, + "short_name": "N2" + }, + "R": { + "common_name": "red", + "long_name": "Red", + "max_wavelength": 690, + "min_wavelength": 620, + "platforms": { + "landsat4": { + "band": "B3", + "bandwidth": 60.0, + "name": "Red", + "platform": "Landsat 4", + "wavelength": 660.0 + }, + "landsat5": { + "band": "B3", + "bandwidth": 60.0, + "name": "Red", + "platform": "Landsat 5", + "wavelength": 660.0 + }, + "landsat7": { + "band": "B3", + "bandwidth": 60.0, + "name": "Red", + "platform": "Landsat 7", + "wavelength": 660.0 + }, + "landsat8": { + "band": "B4", + "bandwidth": 30.0, + "name": "Red", + "platform": "Landsat 8", + "wavelength": 655.0 + }, + "landsat9": { + "band": "B4", + "bandwidth": 30.0, + "name": "Red", + "platform": "Landsat 9", + "wavelength": 655.0 + }, + "modis": { + "band": "B1", + "bandwidth": 50.0, + "name": "Red", + "platform": "Terra/Aqua: MODIS", + "wavelength": 645.0 + }, + "planetscope": { + "band": "B6", + "bandwidth": 30.0, + "name": "Red", + "platform": "PlanetScope", + "wavelength": 665.0 + }, + "sentinel2a": { + "band": "B4", + "bandwidth": 31.0, + "name": "Red", + "platform": "Sentinel-2A", + "wavelength": 664.6 + }, + "sentinel2b": { + "band": "B4", + "bandwidth": 31.0, + "name": "Red", + "platform": "Sentinel-2B", + "wavelength": 665.0 + }, + "wv2": { + "band": "B5", + "bandwidth": 60.0, + "name": "Red", + "platform": "WorldView-2", + "wavelength": 660.0 + }, + "wv3": { + "band": "B5", + "bandwidth": 60.0, + "name": "Red", + "platform": "WorldView-3", + "wavelength": 660.0 + } + }, + "short_name": "R" + }, + "RE1": { + "common_name": "rededge", + "long_name": "Red Edge 1", + "max_wavelength": 715, + "min_wavelength": 695, + "platforms": { + "planetscope": { + "band": "B7", + "bandwidth": 16.0, + "name": "Red Edge", + "platform": "PlanetScope", + "wavelength": 705.0 + }, + "sentinel2a": { + "band": "B5", + "bandwidth": 15.0, + "name": "Red Edge 1", + "platform": "Sentinel-2A", + "wavelength": 704.1 + }, + "sentinel2b": { + "band": "B5", + "bandwidth": 15.0, + "name": "Red Edge 1", + "platform": "Sentinel-2B", + "wavelength": 703.8 + } + }, + "short_name": "RE1" + }, + "RE2": { + "common_name": "rededge", + "long_name": "Red Edge 2", + "max_wavelength": 750, + "min_wavelength": 730, + "platforms": { + "sentinel2a": { + "band": "B6", + "bandwidth": 15.0, + "name": "Red Edge 2", + "platform": "Sentinel-2A", + "wavelength": 740.5 + }, + "sentinel2b": { + "band": "B6", + "bandwidth": 15.0, + "name": "Red Edge 2", + "platform": "Sentinel-2B", + "wavelength": 739.1 + } + }, + "short_name": "RE2" + }, + "RE3": { + "common_name": "rededge", + "long_name": "Red Edge 3", + "max_wavelength": 795, + "min_wavelength": 765, + "platforms": { + "sentinel2a": { + "band": "B7", + "bandwidth": 20.0, + "name": "Red Edge 3", + "platform": "Sentinel-2A", + "wavelength": 782.8 + }, + "sentinel2b": { + "band": "B7", + "bandwidth": 20.0, + "name": "Red Edge 3", + "platform": "Sentinel-2B", + "wavelength": 779.7 + } + }, + "short_name": "RE3" + }, + "S1": { + "common_name": "swir16", + "long_name": "Short-wave Infrared (SWIR) 1", + "max_wavelength": 1750, + "min_wavelength": 1550, + "platforms": { + "landsat4": { + "band": "B5", + "bandwidth": 200.0, + "name": "Short-wave Infrared (SWIR) 1", + "platform": "Landsat 4", + "wavelength": 1650.0 + }, + "landsat5": { + "band": "B5", + "bandwidth": 200.0, + "name": "Short-wave Infrared (SWIR) 1", + "platform": "Landsat 5", + "wavelength": 1650.0 + }, + "landsat7": { + "band": "B5", + "bandwidth": 200.0, + "name": "Short-wave Infrared (SWIR) 1", + "platform": "Landsat 7", + "wavelength": 1650.0 + }, + "landsat8": { + "band": "B6", + "bandwidth": 80.0, + "name": "Short-wave Infrared (SWIR) 1", + "platform": "Landsat 8", + "wavelength": 1610.0 + }, + "landsat9": { + "band": "B6", + "bandwidth": 80.0, + "name": "Short-wave Infrared (SWIR) 1", + "platform": "Landsat 9", + "wavelength": 1610.0 + }, + "modis": { + "band": "B6", + "bandwidth": 24.0, + "name": "Short-wave Infrared (SWIR) 1", + "platform": "Terra/Aqua: MODIS", + "wavelength": 1640.0 + }, + "sentinel2a": { + "band": "B11", + "bandwidth": 91.0, + "name": "Short-wave Infrared (SWIR) 1", + "platform": "Sentinel-2A", + "wavelength": 1613.7 + }, + "sentinel2b": { + "band": "B11", + "bandwidth": 94.0, + "name": "Short-wave Infrared (SWIR) 1", + "platform": "Sentinel-2B", + "wavelength": 1610.4 + } + }, + "short_name": "S1" + }, + "S2": { + "common_name": "swir22", + "long_name": "Short-wave Infrared (SWIR) 2", + "max_wavelength": 2350, + "min_wavelength": 2080, + "platforms": { + "landsat4": { + "band": "B7", + "bandwidth": 270.0, + "name": "Short-wave Infrared (SWIR) 2", + "platform": "Landsat 4", + "wavelength": 2215.0 + }, + "landsat5": { + "band": "B7", + "bandwidth": 270.0, + "name": "Short-wave Infrared (SWIR) 2", + "platform": "Landsat 5", + "wavelength": 2215.0 + }, + "landsat7": { + "band": "B7", + "bandwidth": 260.0, + "name": "Short-wave Infrared (SWIR) 2", + "platform": "Landsat 7", + "wavelength": 2220.0 + }, + "landsat8": { + "band": "B7", + "bandwidth": 180.0, + "name": "Short-wave Infrared (SWIR) 2", + "platform": "Landsat 8", + "wavelength": 2200.0 + }, + "landsat9": { + "band": "B7", + "bandwidth": 180.0, + "name": "Short-wave Infrared (SWIR) 2", + "platform": "Landsat 9", + "wavelength": 2200.0 + }, + "modis": { + "band": "B7", + "bandwidth": 50.0, + "name": "Short-wave Infrared (SWIR) 2", + "platform": "Terra/Aqua: MODIS", + "wavelength": 2130.0 + }, + "sentinel2a": { + "band": "B12", + "bandwidth": 175.0, + "name": "Short-wave Infrared (SWIR) 2", + "platform": "Sentinel-2A", + "wavelength": 2202.4 + }, + "sentinel2b": { + "band": "B12", + "bandwidth": 185.0, + "name": "Short-wave Infrared (SWIR) 2", + "platform": "Sentinel-2B", + "wavelength": 2185.7 + } + }, + "short_name": "S2" + }, + "T": { + "common_name": "lwir", + "long_name": "Thermal Infrared", + "max_wavelength": 12500, + "min_wavelength": 10400, + "platforms": { + "landsat4": { + "band": "B6", + "bandwidth": 2100.0, + "name": "Thermal Infrared", + "platform": "Landsat 4", + "wavelength": 11450.0 + }, + "landsat5": { + "band": "B6", + "bandwidth": 2100.0, + "name": "Thermal Infrared", + "platform": "Landsat 5", + "wavelength": 11450.0 + }, + "landsat7": { + "band": "B6", + "bandwidth": 2100.0, + "name": "Thermal Infrared", + "platform": "Landsat 7", + "wavelength": 11450.0 + } + }, + "short_name": "T" + }, + "T1": { + "common_name": "lwir11", + "long_name": "Thermal Infrared 1", + "max_wavelength": 11190, + "min_wavelength": 10600, + "platforms": { + "landsat8": { + "band": "B10", + "bandwidth": 590.0, + "name": "Thermal Infrared 1", + "platform": "Landsat 8", + "wavelength": 10895.0 + }, + "landsat9": { + "band": "B10", + "bandwidth": 590.0, + "name": "Thermal Infrared 1", + "platform": "Landsat 9", + "wavelength": 10895.0 + } + }, + "short_name": "T1" + }, + "T2": { + "common_name": "lwir12", + "long_name": "Thermal Infrared 2", + "max_wavelength": 12510, + "min_wavelength": 11500, + "platforms": { + "landsat8": { + "band": "B11", + "bandwidth": 1010.0, + "name": "Thermal Infrared 2", + "platform": "Landsat 8", + "wavelength": 12005.0 + }, + "landsat9": { + "band": "B11", + "bandwidth": 1010.0, + "name": "Thermal Infrared 2", + "platform": "Landsat 9", + "wavelength": 12005.0 + } + }, + "short_name": "T2" + }, + "WV": { + "common_name": "nir09", + "long_name": "Water Vapour", + "max_wavelength": 960, + "min_wavelength": 930, + "platforms": { + "sentinel2a": { + "band": "B9", + "bandwidth": 20.0, + "name": "Water Vapour", + "platform": "Sentinel-2A", + "wavelength": 945.1 + }, + "sentinel2b": { + "band": "B9", + "bandwidth": 21.0, + "name": "Water Vapour", + "platform": "Sentinel-2B", + "wavelength": 943.2 + } + }, + "short_name": "WV" + }, + "Y": { + "common_name": "yellow", + "long_name": "Yellow", + "max_wavelength": 625, + "min_wavelength": 585, + "platforms": { + "planetscope": { + "band": "B5", + "bandwidth": 20.0, + "name": "Yellow", + "platform": "PlanetScope", + "wavelength": 610.0 + }, + "wv2": { + "band": "B4", + "bandwidth": 40.0, + "name": "Yellow", + "platform": "WorldView-2", + "wavelength": 605.0 + }, + "wv3": { + "band": "B4", + "bandwidth": 40.0, + "name": "Yellow", + "platform": "WorldView-3", + "wavelength": 605.0 + } + }, + "short_name": "Y" + } +} diff --git a/lib/openeo/extra/spectral_indices/resources/awesome-spectral-indices/constants.json b/lib/openeo/extra/spectral_indices/resources/awesome-spectral-indices/constants.json new file mode 100644 index 000000000..3aa7cd7b5 --- /dev/null +++ b/lib/openeo/extra/spectral_indices/resources/awesome-spectral-indices/constants.json @@ -0,0 +1,107 @@ +{ + "C1": { + "default": 6.0, + "description": "Coefficient 1 for the aerosol resistance term", + "short_name": "C1" + }, + "C2": { + "default": 7.5, + "description": "Coefficient 2 for the aerosol resistance term", + "short_name": "C2" + }, + "L": { + "default": 1.0, + "description": "Canopy background adjustment", + "short_name": "L" + }, + "PAR": { + "default": null, + "description": "Photosynthetically Active Radiation", + "short_name": "PAR" + }, + "alpha": { + "default": 0.1, + "description": "Weighting coefficient used for WDRVI", + "short_name": "alpha" + }, + "beta": { + "default": 0.05, + "description": "Calibration parameter used for NDSInw", + "short_name": "beta" + }, + "c": { + "default": 1.0, + "description": "Trade-off parameter in the polynomial kernel", + "short_name": "c" + }, + "cexp": { + "default": 1.16, + "description": "Exponent used for OCVI", + "short_name": "cexp" + }, + "fdelta": { + "default": 0.581, + "description": "Adjustment factor used for SEVI", + "short_name": "fdelta" + }, + "g": { + "default": 2.5, + "description": "Gain factor", + "short_name": "g" + }, + "gamma": { + "default": 1.0, + "description": "Weighting coefficient used for ARVI", + "short_name": "gamma" + }, + "k": { + "default": 0.0, + "description": "Slope parameter by soil used for NIRvH2", + "short_name": "k" + }, + "lambdaG": { + "default": null, + "description": "Green wavelength (nm) used for NDGI", + "short_name": "lambdaG" + }, + "lambdaN": { + "default": null, + "description": "NIR wavelength (nm) used for NIRvH2 and NDGI", + "short_name": "lambdaN" + }, + "lambdaR": { + "default": null, + "description": "Red wavelength (nm) used for NIRvH2 and NDGI", + "short_name": "lambdaR" + }, + "nexp": { + "default": 2.0, + "description": "Exponent used for GDVI", + "short_name": "nexp" + }, + "omega": { + "default": 2.0, + "description": "Weighting coefficient used for MBWI", + "short_name": "omega" + }, + "p": { + "default": 2.0, + "description": "Kernel degree in the polynomial kernel", + "short_name": "p" + }, + "sigma": { + "default": 0.5, + "description": "Length-scale parameter in the RBF kernel", + "short_name": "sigma" + }, + "sla": { + "default": 1.0, + "description": "Soil line slope", + "short_name": "sla" + }, + "slb": { + "default": 0.0, + "description": "Soil line intercept", + "short_name": "slb" + } +} diff --git a/lib/openeo/extra/spectral_indices/resources/awesome-spectral-indices/spectral-indices-dict.json b/lib/openeo/extra/spectral_indices/resources/awesome-spectral-indices/spectral-indices-dict.json new file mode 100644 index 000000000..04fbce636 --- /dev/null +++ b/lib/openeo/extra/spectral_indices/resources/awesome-spectral-indices/spectral-indices-dict.json @@ -0,0 +1,4616 @@ +{ + "SpectralIndices": { + "AFRI1600": { + "application_domain": "vegetation", + "bands": [ + "N", + "S1" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-11-17", + "formula": "(N - 0.66 * S1) / (N + 0.66 * S1)", + "long_name": "Aerosol Free Vegetation Index (1600 nm)", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.1016/S0034-4257(01)00190-0", + "short_name": "AFRI1600" + }, + "AFRI2100": { + "application_domain": "vegetation", + "bands": [ + "N", + "S2" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-11-17", + "formula": "(N - 0.5 * S2) / (N + 0.5 * S2)", + "long_name": "Aerosol Free Vegetation Index (2100 nm)", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.1016/S0034-4257(01)00190-0", + "short_name": "AFRI2100" + }, + "ANDWI": { + "application_domain": "water", + "bands": [ + "B", + "G", + "R", + "N", + "S1", + "S2" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-09-22", + "formula": "(B + G + R - N - S1 - S2)/(B + G + R + N + S1 + S2)", + "long_name": "Augmented Normalized Difference Water Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.1016/j.envsoft.2021.105030", + "short_name": "ANDWI" + }, + "ARI": { + "application_domain": "vegetation", + "bands": [ + "G", + "RE1" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-09-20", + "formula": "(1 / G) - (1 / RE1)", + "long_name": "Anthocyanin Reflectance Index", + "platforms": [ + "Sentinel-2" + ], + "reference": "https://doi.org/10.1562/0031-8655(2001)074%3C0038:OPANEO%3E2.0.CO;2", + "short_name": "ARI" + }, + "ARI2": { + "application_domain": "vegetation", + "bands": [ + "N", + "G", + "RE1" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-08", + "formula": "N * ((1 / G) - (1 / RE1))", + "long_name": "Anthocyanin Reflectance Index 2", + "platforms": [ + "Sentinel-2" + ], + "reference": "https://doi.org/10.1562/0031-8655(2001)074%3C0038:OPANEO%3E2.0.CO;2", + "short_name": "ARI2" + }, + "ARVI": { + "application_domain": "vegetation", + "bands": [ + "N", + "R", + "gamma", + "B" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-05-11", + "formula": "(N - (R - gamma * (R - B))) / (N + (R - gamma * (R - B)))", + "long_name": "Atmospherically Resistant Vegetation Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1109/36.134076", + "short_name": "ARVI" + }, + "ATSAVI": { + "application_domain": "vegetation", + "bands": [ + "sla", + "N", + "R", + "slb" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-05-14", + "formula": "sla * (N - sla * R - slb) / (sla * N + R - sla * slb + 0.08 * (1 + sla ** 2.0))", + "long_name": "Adjusted Transformed Soil-Adjusted Vegetation Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1016/0034-4257(91)90009-U", + "short_name": "ATSAVI" + }, + "AVI": { + "application_domain": "vegetation", + "bands": [ + "N", + "R" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-08", + "formula": "(N * (1.0 - R) * (N - R)) ** (1/3)", + "long_name": "Advanced Vegetation Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.465.8749&rep=rep1&type=pdf", + "short_name": "AVI" + }, + "AWEInsh": { + "application_domain": "water", + "bands": [ + "G", + "S1", + "N", + "S2" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-09-18", + "formula": "4.0 * (G - S1) - 0.25 * N + 2.75 * S2", + "long_name": "Automated Water Extraction Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.1016/j.rse.2013.08.029", + "short_name": "AWEInsh" + }, + "AWEIsh": { + "application_domain": "water", + "bands": [ + "B", + "G", + "N", + "S1", + "S2" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-09-18", + "formula": "B + 2.5 * G - 1.5 * (N + S1) - 0.25 * S2", + "long_name": "Automated Water Extraction Index with Shadows Elimination", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.1016/j.rse.2013.08.029", + "short_name": "AWEIsh" + }, + "BAI": { + "application_domain": "burn", + "bands": [ + "R", + "N" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-04-07", + "formula": "1.0 / ((0.1 - R) ** 2.0 + (0.06 - N) ** 2.0)", + "long_name": "Burned Area Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://digital.csic.es/bitstream/10261/6426/1/Martin_Isabel_Serie_Geografica.pdf", + "short_name": "BAI" + }, + "BAIM": { + "application_domain": "burn", + "bands": [ + "N", + "S2" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-20", + "formula": "1.0/((0.05 - N) ** 2.0) + ((0.2 - S2) ** 2.0)", + "long_name": "Burned Area Index adapted to MODIS", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.1016/j.foreco.2006.08.248", + "short_name": "BAIM" + }, + "BAIS2": { + "application_domain": "burn", + "bands": [ + "RE2", + "RE3", + "N2", + "R", + "S2" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-04-07", + "formula": "(1.0 - ((RE2 * RE3 * N2) / R) ** 0.5) * (((S2 - N2)/(S2 + N2) ** 0.5) + 1.0)", + "long_name": "Burned Area Index for Sentinel 2", + "platforms": [ + "Sentinel-2" + ], + "reference": "https://doi.org/10.3390/ecrs-2-05177", + "short_name": "BAIS2" + }, + "BCC": { + "application_domain": "vegetation", + "bands": [ + "B", + "R", + "G" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-01-17", + "formula": "B / (R + G + B)", + "long_name": "Blue Chromatic Coordinate", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1016/0034-4257(87)90088-5", + "short_name": "BCC" + }, + "BI": { + "application_domain": "soil", + "bands": [ + "S1", + "R", + "N", + "B" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-08", + "formula": "((S1 + R) - (N + B))/((S1 + R) + (N + B))", + "long_name": "Bare Soil Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.465.8749&rep=rep1&type=pdf", + "short_name": "BI" + }, + "BITM": { + "application_domain": "soil", + "bands": [ + "B", + "G", + "R" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-11-20", + "formula": "(((B**2.0)+(G**2.0)+(R**2.0))/3.0)**0.5", + "long_name": "Landsat TM-based Brightness Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1016/S0034-4257(98)00030-3", + "short_name": "BITM" + }, + "BIXS": { + "application_domain": "soil", + "bands": [ + "G", + "R" + ], + "contributor": "https://github.com/remi-braun", + "date_of_addition": "2022-11-20", + "formula": "(((G**2.0)+(R**2.0))/2.0)**0.5", + "long_name": "SPOT HRV XS-based Brightness Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1016/S0034-4257(98)00030-3", + "short_name": "BIXS" + }, + "BLFEI": { + "application_domain": "urban", + "bands": [ + "G", + "R", + "S2", + "S1" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-02-09", + "formula": "(((G+R+S2)/3.0)-S1)/(((G+R+S2)/3.0)+S1)", + "long_name": "Built-Up Land Features Extraction Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.1080/10106049.2018.1497094", + "short_name": "BLFEI" + }, + "BNDVI": { + "application_domain": "vegetation", + "bands": [ + "N", + "B" + ], + "contributor": "https://github.com/MATRIX4284", + "date_of_addition": "2021-04-07", + "formula": "(N - B)/(N + B)", + "long_name": "Blue Normalized Difference Vegetation Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1016/S1672-6308(07)60027-4", + "short_name": "BNDVI" + }, + "BRBA": { + "application_domain": "urban", + "bands": [ + "R", + "S1" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-09-22", + "formula": "R/S1", + "long_name": "Band Ratio for Built-up Area", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://www.omicsonline.org/scientific-reports/JGRS-SR136.pdf", + "short_name": "BRBA" + }, + "BWDRVI": { + "application_domain": "vegetation", + "bands": [ + "alpha", + "N", + "B" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-09-20", + "formula": "(alpha * N - B) / (alpha * N + B)", + "long_name": "Blue Wide Dynamic Range Vegetation Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.2135/cropsci2007.01.0031", + "short_name": "BWDRVI" + }, + "BaI": { + "application_domain": "soil", + "bands": [ + "R", + "S1", + "N" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-18", + "formula": "R + S1 - N", + "long_name": "Bareness Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.1109/IGARSS.2005.1525743", + "short_name": "BaI" + }, + "CCI": { + "application_domain": "vegetation", + "bands": [ + "G1", + "R" + ], + "contributor": "https://github.com/joanvlasschaert", + "date_of_addition": "2023-03-12", + "formula": "(G1 - R)/(G1 + R)", + "long_name": "Chlorophyll Carotenoid Index", + "platforms": [ + "MODIS" + ], + "reference": "https://doi.org/10.1073/pnas.1606162113", + "short_name": "CCI" + }, + "CIG": { + "application_domain": "vegetation", + "bands": [ + "N", + "G" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-04-07", + "formula": "(N / G) - 1.0", + "long_name": "Chlorophyll Index Green", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1078/0176-1617-00887", + "short_name": "CIG" + }, + "CIRE": { + "application_domain": "vegetation", + "bands": [ + "N", + "RE1" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-05-13", + "formula": "(N / RE1) - 1", + "long_name": "Chlorophyll Index Red Edge", + "platforms": [ + "Sentinel-2" + ], + "reference": "https://doi.org/10.1078/0176-1617-00887", + "short_name": "CIRE" + }, + "CSI": { + "application_domain": "burn", + "bands": [ + "N", + "S2" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-19", + "formula": "N/S2", + "long_name": "Char Soil Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.1016/j.rse.2005.04.014", + "short_name": "CSI" + }, + "CSIT": { + "application_domain": "burn", + "bands": [ + "N", + "S2", + "T" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-04-07", + "formula": "N / (S2 * T / 10000.0)", + "long_name": "Char Soil Index Thermal", + "platforms": [ + "Landsat-TM", + "Landsat-ETM+" + ], + "reference": "https://doi.org/10.1080/01431160600954704", + "short_name": "CSIT" + }, + "CVI": { + "application_domain": "vegetation", + "bands": [ + "N", + "R", + "G" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-04-07", + "formula": "(N * R) / (G ** 2.0)", + "long_name": "Chlorophyll Vegetation Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1007/s11119-010-9204-3", + "short_name": "CVI" + }, + "DBI": { + "application_domain": "urban", + "bands": [ + "B", + "T1", + "N", + "R" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-18", + "formula": "((B - T1)/(B + T1)) - ((N - R)/(N + R))", + "long_name": "Dry Built-Up Index", + "platforms": [ + "Landsat-OLI" + ], + "reference": "https://doi.org/10.3390/land7030081", + "short_name": "DBI" + }, + "DBSI": { + "application_domain": "soil", + "bands": [ + "S1", + "G", + "N", + "R" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-18", + "formula": "((S1 - G)/(S1 + G)) - ((N - R)/(N + R))", + "long_name": "Dry Bareness Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.3390/land7030081", + "short_name": "DBSI" + }, + "DPDD": { + "application_domain": "radar", + "bands": [ + "VV", + "VH" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-19", + "formula": "(VV + VH)/2.0 ** 0.5", + "long_name": "Dual-Pol Diagonal Distance", + "platforms": [ + "Sentinel-1 (Dual Polarisation VV-VH)" + ], + "reference": "https://doi.org/10.1016/j.rse.2018.09.003", + "short_name": "DPDD" + }, + "DSI": { + "application_domain": "vegetation", + "bands": [ + "S1", + "N" + ], + "contributor": "https://github.com/remi-braun", + "date_of_addition": "2022-10-26", + "formula": "S1/N", + "long_name": "Drought Stress Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://www.asprs.org/wp-content/uploads/pers/1999journal/apr/1999_apr_495-501.pdf", + "short_name": "DSI" + }, + "DSWI1": { + "application_domain": "vegetation", + "bands": [ + "N", + "S1" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-10-29", + "formula": "N/S1", + "long_name": "Disease-Water Stress Index 1", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.1080/01431160310001618031", + "short_name": "DSWI1" + }, + "DSWI2": { + "application_domain": "vegetation", + "bands": [ + "S1", + "G" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-10-29", + "formula": "S1/G", + "long_name": "Disease-Water Stress Index 2", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.1080/01431160310001618031", + "short_name": "DSWI2" + }, + "DSWI3": { + "application_domain": "vegetation", + "bands": [ + "S1", + "R" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-10-29", + "formula": "S1/R", + "long_name": "Disease-Water Stress Index 3", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.1080/01431160310001618031", + "short_name": "DSWI3" + }, + "DSWI4": { + "application_domain": "vegetation", + "bands": [ + "G", + "R" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-10-29", + "formula": "G/R", + "long_name": "Disease-Water Stress Index 4", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1080/01431160310001618031", + "short_name": "DSWI4" + }, + "DSWI5": { + "application_domain": "vegetation", + "bands": [ + "N", + "G", + "S1", + "R" + ], + "contributor": "https://github.com/remi-braun", + "date_of_addition": "2022-10-26", + "formula": "(N + G)/(S1 + R)", + "long_name": "Disease-Water Stress Index 5", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.1080/01431160310001618031", + "short_name": "DSWI5" + }, + "DVI": { + "application_domain": "vegetation", + "bands": [ + "N", + "R" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-05-14", + "formula": "N - R", + "long_name": "Difference Vegetation Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1016/0034-4257(94)00114-3", + "short_name": "DVI" + }, + "DVIplus": { + "application_domain": "vegetation", + "bands": [ + "lambdaN", + "lambdaR", + "lambdaG", + "G", + "N", + "R" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-01-20", + "formula": "((lambdaN - lambdaR)/(lambdaN - lambdaG)) * G + (1.0 - ((lambdaN - lambdaR)/(lambdaN - lambdaG))) * N - R", + "long_name": "Difference Vegetation Index Plus", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1016/j.rse.2019.03.028", + "short_name": "DVIplus" + }, + "DpRVIHH": { + "application_domain": "radar", + "bands": [ + "HV", + "HH" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-12-25", + "formula": "(4.0 * HV)/(HH + HV)", + "long_name": "Dual-Polarized Radar Vegetation Index HH", + "platforms": [ + "Sentinel-1 (Dual Polarisation HH-HV)" + ], + "reference": "https://www.tandfonline.com/doi/abs/10.5589/m12-043", + "short_name": "DpRVIHH" + }, + "DpRVIVV": { + "application_domain": "radar", + "bands": [ + "VH", + "VV" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-12-25", + "formula": "(4.0 * VH)/(VV + VH)", + "long_name": "Dual-Polarized Radar Vegetation Index VV", + "platforms": [ + "Sentinel-1 (Dual Polarisation VV-VH)" + ], + "reference": "https://doi.org/10.3390/app9040655", + "short_name": "DpRVIVV" + }, + "EBBI": { + "application_domain": "urban", + "bands": [ + "S1", + "N", + "T" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-09-17", + "formula": "(S1 - N) / (10.0 * ((S1 + T) ** 0.5))", + "long_name": "Enhanced Built-Up and Bareness Index", + "platforms": [ + "Landsat-TM", + "Landsat-ETM+" + ], + "reference": "https://doi.org/10.3390/rs4102957", + "short_name": "EBBI" + }, + "EMBI": { + "application_domain": "soil", + "bands": [ + "S1", + "S2", + "N", + "G" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-18", + "formula": "((((S1 - S2 - N)/(S1 + S2 + N)) + 0.5) - ((G - S1)/(G + S1)) - 0.5)/((((S1 - S2 - N)/(S1 + S2 + N)) + 0.5) + ((G - S1)/(G + S1)) + 1.5)", + "long_name": "Enhanced Modified Bare Soil Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.1016/j.jag.2022.102703", + "short_name": "EMBI" + }, + "EVI": { + "application_domain": "vegetation", + "bands": [ + "g", + "N", + "R", + "C1", + "C2", + "B", + "L" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-04-07", + "formula": "g * (N - R) / (N + C1 * R - C2 * B + L)", + "long_name": "Enhanced Vegetation Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1016/S0034-4257(96)00112-5", + "short_name": "EVI" + }, + "EVI2": { + "application_domain": "vegetation", + "bands": [ + "g", + "N", + "R", + "L" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-04-07", + "formula": "g * (N - R) / (N + 2.4 * R + L)", + "long_name": "Two-Band Enhanced Vegetation Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1016/j.rse.2008.06.006", + "short_name": "EVI2" + }, + "ExG": { + "application_domain": "vegetation", + "bands": [ + "G", + "R", + "B" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-05-14", + "formula": "2 * G - R - B", + "long_name": "Excess Green Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.13031/2013.27838", + "short_name": "ExG" + }, + "ExGR": { + "application_domain": "vegetation", + "bands": [ + "G", + "R", + "B" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-08", + "formula": "(2.0 * G - R - B) - (1.3 * R - G)", + "long_name": "ExG - ExR Vegetation Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1016/j.compag.2008.03.009", + "short_name": "ExGR" + }, + "ExR": { + "application_domain": "vegetation", + "bands": [ + "R", + "G" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-08", + "formula": "1.3 * R - G", + "long_name": "Excess Red Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1117/12.336896", + "short_name": "ExR" + }, + "FCVI": { + "application_domain": "vegetation", + "bands": [ + "N", + "R", + "G", + "B" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-01-20", + "formula": "N - ((R + G + B)/3.0)", + "long_name": "Fluorescence Correction Vegetation Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1016/j.rse.2020.111676", + "short_name": "FCVI" + }, + "GARI": { + "application_domain": "vegetation", + "bands": [ + "N", + "G", + "B", + "R" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-04-07", + "formula": "(N - (G - (B - R))) / (N - (G + (B - R)))", + "long_name": "Green Atmospherically Resistant Vegetation Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1016/S0034-4257(96)00072-7", + "short_name": "GARI" + }, + "GBNDVI": { + "application_domain": "vegetation", + "bands": [ + "N", + "G", + "B" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-04-07", + "formula": "(N - (G + B))/(N + (G + B))", + "long_name": "Green-Blue Normalized Difference Vegetation Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1016/S1672-6308(07)60027-4", + "short_name": "GBNDVI" + }, + "GCC": { + "application_domain": "vegetation", + "bands": [ + "G", + "R", + "B" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-01-17", + "formula": "G / (R + G + B)", + "long_name": "Green Chromatic Coordinate", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1016/0034-4257(87)90088-5", + "short_name": "GCC" + }, + "GDVI": { + "application_domain": "vegetation", + "bands": [ + "N", + "nexp", + "R" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-05-14", + "formula": "((N ** nexp) - (R ** nexp)) / ((N ** nexp) + (R ** nexp))", + "long_name": "Generalized Difference Vegetation Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.3390/rs6021211", + "short_name": "GDVI" + }, + "GEMI": { + "application_domain": "vegetation", + "bands": [ + "N", + "R" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-04-07", + "formula": "((2.0*((N ** 2.0)-(R ** 2.0)) + 1.5*N + 0.5*R)/(N + R + 0.5))*(1.0 - 0.25*((2.0 * ((N ** 2.0) - (R ** 2)) + 1.5 * N + 0.5 * R)/(N + R + 0.5)))-((R - 0.125)/(1 - R))", + "long_name": "Global Environment Monitoring Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "http://dx.doi.org/10.1007/bf00031911", + "short_name": "GEMI" + }, + "GLI": { + "application_domain": "vegetation", + "bands": [ + "G", + "R", + "B" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-04-07", + "formula": "(2.0 * G - R - B) / (2.0 * G + R + B)", + "long_name": "Green Leaf Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "http://dx.doi.org/10.1080/10106040108542184", + "short_name": "GLI" + }, + "GM1": { + "application_domain": "vegetation", + "bands": [ + "RE2", + "G" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-08", + "formula": "RE2/G", + "long_name": "Gitelson and Merzlyak Index 1", + "platforms": [ + "Sentinel-2" + ], + "reference": "https://doi.org/10.1016/S0176-1617(96)80284-7", + "short_name": "GM1" + }, + "GM2": { + "application_domain": "vegetation", + "bands": [ + "RE2", + "RE1" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-08", + "formula": "RE2/RE1", + "long_name": "Gitelson and Merzlyak Index 2", + "platforms": [ + "Sentinel-2" + ], + "reference": "https://doi.org/10.1016/S0176-1617(96)80284-7", + "short_name": "GM2" + }, + "GNDVI": { + "application_domain": "vegetation", + "bands": [ + "N", + "G" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-04-07", + "formula": "(N - G)/(N + G)", + "long_name": "Green Normalized Difference Vegetation Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1016/S0034-4257(96)00072-7", + "short_name": "GNDVI" + }, + "GOSAVI": { + "application_domain": "vegetation", + "bands": [ + "N", + "G" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-08", + "formula": "(N - G) / (N + G + 0.16)", + "long_name": "Green Optimized Soil Adjusted Vegetation Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.2134/agronj2004.0314", + "short_name": "GOSAVI" + }, + "GRNDVI": { + "application_domain": "vegetation", + "bands": [ + "N", + "G", + "R" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-04-07", + "formula": "(N - (G + R))/(N + (G + R))", + "long_name": "Green-Red Normalized Difference Vegetation Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1016/S1672-6308(07)60027-4", + "short_name": "GRNDVI" + }, + "GRVI": { + "application_domain": "vegetation", + "bands": [ + "N", + "G" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-08", + "formula": "N/G", + "long_name": "Green Ratio Vegetation Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.2134/agronj2004.0314", + "short_name": "GRVI" + }, + "GSAVI": { + "application_domain": "vegetation", + "bands": [ + "L", + "N", + "G" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-08", + "formula": "(1.0 + L) * (N - G) / (N + G + L)", + "long_name": "Green Soil Adjusted Vegetation Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.2134/agronj2004.0314", + "short_name": "GSAVI" + }, + "GVMI": { + "application_domain": "vegetation", + "bands": [ + "N", + "S2" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-04-07", + "formula": "((N + 0.1) - (S2 + 0.02)) / ((N + 0.1) + (S2 + 0.02))", + "long_name": "Global Vegetation Moisture Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.1016/S0034-4257(02)00037-8", + "short_name": "GVMI" + }, + "IAVI": { + "application_domain": "vegetation", + "bands": [ + "N", + "R", + "gamma", + "B" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-08", + "formula": "(N - (R - gamma * (B - R)))/(N + (R - gamma * (B - R)))", + "long_name": "New Atmospherically Resistant Vegetation Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://www.jipb.net/EN/abstract/abstract23925.shtml", + "short_name": "IAVI" + }, + "IBI": { + "application_domain": "urban", + "bands": [ + "S1", + "N", + "R", + "L", + "G" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-02-09", + "formula": "(((S1-N)/(S1+N))-(((N-R)*(1.0+L)/(N+R+L))+((G-S1)/(G+S1)))/2.0)/(((S1-N)/(S1+N))+(((N-R)*(1.0+L)/(N+R+L))+((G-S1)/(G+S1)))/2.0)", + "long_name": "Index-Based Built-Up Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.1080/01431160802039957", + "short_name": "IBI" + }, + "IKAW": { + "application_domain": "vegetation", + "bands": [ + "R", + "B" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-08", + "formula": "(R - B)/(R + B)", + "long_name": "Kawashima Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1006/anbo.1997.0544", + "short_name": "IKAW" + }, + "IPVI": { + "application_domain": "vegetation", + "bands": [ + "N", + "R" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-08", + "formula": "N/(N + R)", + "long_name": "Infrared Percentage Vegetation Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1016/0034-4257(90)90085-Z", + "short_name": "IPVI" + }, + "IRECI": { + "application_domain": "vegetation", + "bands": [ + "RE3", + "R", + "RE1", + "RE2" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-09-17", + "formula": "(RE3 - R) / (RE1 / RE2)", + "long_name": "Inverted Red-Edge Chlorophyll Index", + "platforms": [ + "Sentinel-2" + ], + "reference": "https://doi.org/10.1016/j.isprsjprs.2013.04.007", + "short_name": "IRECI" + }, + "LSWI": { + "application_domain": "water", + "bands": [ + "N", + "S1" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-20", + "formula": "(N - S1)/(N + S1)", + "long_name": "Land Surface Water Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.1016/j.rse.2003.11.008", + "short_name": "LSWI" + }, + "MBI": { + "application_domain": "soil", + "bands": [ + "S1", + "S2", + "N" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-18", + "formula": "((S1 - S2 - N)/(S1 + S2 + N)) + 0.5", + "long_name": "Modified Bare Soil Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.3390/land10030231", + "short_name": "MBI" + }, + "MBWI": { + "application_domain": "water", + "bands": [ + "omega", + "G", + "R", + "N", + "S1", + "S2" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-01-17", + "formula": "(omega * G) - R - N - S1 - S2", + "long_name": "Multi-Band Water Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.1016/j.jag.2018.01.018", + "short_name": "MBWI" + }, + "MCARI": { + "application_domain": "vegetation", + "bands": [ + "RE1", + "R", + "G" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-05-13", + "formula": "((RE1 - R) - 0.2 * (RE1 - G)) * (RE1 / R)", + "long_name": "Modified Chlorophyll Absorption in Reflectance Index", + "platforms": [ + "Sentinel-2" + ], + "reference": "http://dx.doi.org/10.1016/S0034-4257(00)00113-9", + "short_name": "MCARI" + }, + "MCARI1": { + "application_domain": "vegetation", + "bands": [ + "N", + "R", + "G" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-05-14", + "formula": "1.2 * (2.5 * (N - R) - 1.3 * (N - G))", + "long_name": "Modified Chlorophyll Absorption in Reflectance Index 1", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1016/j.rse.2003.12.013", + "short_name": "MCARI1" + }, + "MCARI2": { + "application_domain": "vegetation", + "bands": [ + "N", + "R", + "G" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-05-14", + "formula": "(1.5 * (2.5 * (N - R) - 1.3 * (N - G))) / ((((2.0 * N + 1) ** 2) - (6.0 * N - 5 * (R ** 0.5)) - 0.5) ** 0.5)", + "long_name": "Modified Chlorophyll Absorption in Reflectance Index 2", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1016/j.rse.2003.12.013", + "short_name": "MCARI2" + }, + "MCARI705": { + "application_domain": "vegetation", + "bands": [ + "RE2", + "RE1", + "G" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-11-06", + "formula": "((RE2 - RE1) - 0.2 * (RE2 - G)) * (RE2 / RE1)", + "long_name": "Modified Chlorophyll Absorption in Reflectance Index (705 and 750 nm)", + "platforms": [ + "Sentinel-2" + ], + "reference": "https://doi.org/10.1016/j.agrformet.2008.03.005", + "short_name": "MCARI705" + }, + "MCARIOSAVI": { + "application_domain": "vegetation", + "bands": [ + "RE1", + "R", + "G", + "N" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-11-06", + "formula": "(((RE1 - R) - 0.2 * (RE1 - G)) * (RE1 / R)) / (1.16 * (N - R) / (N + R + 0.16))", + "long_name": "MCARI/OSAVI Ratio", + "platforms": [ + "Sentinel-2" + ], + "reference": "https://doi.org/10.1016/S0034-4257(00)00113-9", + "short_name": "MCARIOSAVI" + }, + "MCARIOSAVI705": { + "application_domain": "vegetation", + "bands": [ + "RE2", + "RE1", + "G" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-11-06", + "formula": "(((RE2 - RE1) - 0.2 * (RE2 - G)) * (RE2 / RE1)) / (1.16 * (RE2 - RE1) / (RE2 + RE1 + 0.16))", + "long_name": "MCARI/OSAVI Ratio (705 and 750 nm)", + "platforms": [ + "Sentinel-2" + ], + "reference": "https://doi.org/10.1016/j.agrformet.2008.03.005", + "short_name": "MCARIOSAVI705" + }, + "MGRVI": { + "application_domain": "vegetation", + "bands": [ + "G", + "R" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-05-14", + "formula": "(G ** 2.0 - R ** 2.0) / (G ** 2.0 + R ** 2.0)", + "long_name": "Modified Green Red Vegetation Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1016/j.jag.2015.02.012", + "short_name": "MGRVI" + }, + "MIRBI": { + "application_domain": "burn", + "bands": [ + "S2", + "S1" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-19", + "formula": "10.0 * S2 - 9.8 * S1 + 2.0", + "long_name": "Mid-Infrared Burn Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.1080/01431160110053185", + "short_name": "MIRBI" + }, + "MLSWI26": { + "application_domain": "water", + "bands": [ + "N", + "S1" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-20", + "formula": "(1.0 - N - S1)/(1.0 - N + S1)", + "long_name": "Modified Land Surface Water Index (MODIS Bands 2 and 6)", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.3390/rs71215805", + "short_name": "MLSWI26" + }, + "MLSWI27": { + "application_domain": "water", + "bands": [ + "N", + "S2" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-20", + "formula": "(1.0 - N - S2)/(1.0 - N + S2)", + "long_name": "Modified Land Surface Water Index (MODIS Bands 2 and 7)", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.3390/rs71215805", + "short_name": "MLSWI27" + }, + "MNDVI": { + "application_domain": "vegetation", + "bands": [ + "N", + "S2" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-04-07", + "formula": "(N - S2)/(N + S2)", + "long_name": "Modified Normalized Difference Vegetation Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.1080/014311697216810", + "short_name": "MNDVI" + }, + "MNDWI": { + "application_domain": "water", + "bands": [ + "G", + "S1" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-04-07", + "formula": "(G - S1) / (G + S1)", + "long_name": "Modified Normalized Difference Water Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.1080/01431160600589179", + "short_name": "MNDWI" + }, + "MNLI": { + "application_domain": "vegetation", + "bands": [ + "L", + "N", + "R" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-05-11", + "formula": "(1 + L)*((N ** 2) - R)/((N ** 2) + R + L)", + "long_name": "Modified Non-Linear Vegetation Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1109/TGRS.2003.812910", + "short_name": "MNLI" + }, + "MRBVI": { + "application_domain": "vegetation", + "bands": [ + "R", + "B" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-08", + "formula": "(R ** 2.0 - B ** 2.0)/(R ** 2.0 + B ** 2.0)", + "long_name": "Modified Red Blue Vegetation Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.3390/s20185055", + "short_name": "MRBVI" + }, + "MSAVI": { + "application_domain": "vegetation", + "bands": [ + "N", + "R" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-05-13", + "formula": "0.5 * (2.0 * N + 1 - (((2 * N + 1) ** 2) - 8 * (N - R)) ** 0.5)", + "long_name": "Modified Soil-Adjusted Vegetation Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1016/0034-4257(94)90134-1", + "short_name": "MSAVI" + }, + "MSI": { + "application_domain": "vegetation", + "bands": [ + "S1", + "N" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-08", + "formula": "S1/N", + "long_name": "Moisture Stress Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.1016/0034-4257(89)90046-1", + "short_name": "MSI" + }, + "MSR": { + "application_domain": "vegetation", + "bands": [ + "N", + "R" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-05-14", + "formula": "(N / R - 1) / ((N / R + 1) ** 0.5)", + "long_name": "Modified Simple Ratio", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1080/07038992.1996.10855178", + "short_name": "MSR" + }, + "MSR705": { + "application_domain": "vegetation", + "bands": [ + "RE2", + "RE1" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-11-06", + "formula": "(RE2 / RE1 - 1) / ((RE2 / RE1 + 1) ** 0.5)", + "long_name": "Modified Simple Ratio (705 and 750 nm)", + "platforms": [ + "Sentinel-2" + ], + "reference": "https://doi.org/10.1016/j.agrformet.2008.03.005", + "short_name": "MSR705" + }, + "MTCI": { + "application_domain": "vegetation", + "bands": [ + "RE2", + "RE1", + "R" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-05-13", + "formula": "(RE2 - RE1) / (RE1 - R)", + "long_name": "MERIS Terrestrial Chlorophyll Index", + "platforms": [ + "Sentinel-2" + ], + "reference": "https://doi.org/10.1080/0143116042000274015", + "short_name": "MTCI" + }, + "MTVI1": { + "application_domain": "vegetation", + "bands": [ + "N", + "G", + "R" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-05-14", + "formula": "1.2 * (1.2 * (N - G) - 2.5 * (R - G))", + "long_name": "Modified Triangular Vegetation Index 1", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1016/j.rse.2003.12.013", + "short_name": "MTVI1" + }, + "MTVI2": { + "application_domain": "vegetation", + "bands": [ + "N", + "G", + "R" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-05-14", + "formula": "(1.5 * (1.2 * (N - G) - 2.5 * (R - G))) / ((((2.0 * N + 1) ** 2) - (6.0 * N - 5 * (R ** 0.5)) - 0.5) ** 0.5)", + "long_name": "Modified Triangular Vegetation Index 2", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1016/j.rse.2003.12.013", + "short_name": "MTVI2" + }, + "MuWIR": { + "application_domain": "water", + "bands": [ + "B", + "G", + "N", + "S2", + "S1" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-09", + "formula": "-4.0 * ((B - G)/(B + G)) + 2.0 * ((G - N)/(G + N)) + 2.0 * ((G - S2)/(G + S2)) - ((G - S1)/(G + S1))", + "long_name": "Revised Multi-Spectral Water Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.3390/rs10101643", + "short_name": "MuWIR" + }, + "NBAI": { + "application_domain": "urban", + "bands": [ + "S2", + "S1", + "G" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-09-22", + "formula": "((S2 - S1)/G)/((S2 + S1)/G)", + "long_name": "Normalized Built-up Area Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://www.omicsonline.org/scientific-reports/JGRS-SR136.pdf", + "short_name": "NBAI" + }, + "NBLI": { + "application_domain": "soil", + "bands": [ + "R", + "T" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-18", + "formula": "(R - T)/(R + T)", + "long_name": "Normalized Difference Bare Land Index", + "platforms": [ + "Landsat-TM", + "Landsat-ETM+" + ], + "reference": "https://doi.org/10.3390/rs9030249", + "short_name": "NBLI" + }, + "NBLIOLI": { + "application_domain": "soil", + "bands": [ + "R", + "T1" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2023-03-12", + "formula": "(R - T1)/(R + T1)", + "long_name": "Normalized Difference Bare Land Index for Landsat-OLI", + "platforms": [ + "Landsat-OLI" + ], + "reference": "https://doi.org/10.3390/rs9030249", + "short_name": "NBLIOLI" + }, + "NBR": { + "application_domain": "burn", + "bands": [ + "N", + "S2" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-04-07", + "formula": "(N - S2) / (N + S2)", + "long_name": "Normalized Burn Ratio", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.3133/ofr0211", + "short_name": "NBR" + }, + "NBR2": { + "application_domain": "burn", + "bands": [ + "S1", + "S2" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-09-20", + "formula": "(S1 - S2) / (S1 + S2)", + "long_name": "Normalized Burn Ratio 2", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://www.usgs.gov/core-science-systems/nli/landsat/landsat-normalized-burn-ratio-2", + "short_name": "NBR2" + }, + "NBRSWIR": { + "application_domain": "burn", + "bands": [ + "S2", + "S1" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-09-22", + "formula": "(S2 - S1 - 0.02)/(S2 + S1 + 0.1)", + "long_name": "Normalized Burn Ratio SWIR", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.1080/22797254.2020.1738900", + "short_name": "NBRSWIR" + }, + "NBRT1": { + "application_domain": "burn", + "bands": [ + "N", + "S2", + "T" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-04-07", + "formula": "(N - (S2 * T / 10000.0)) / (N + (S2 * T / 10000.0))", + "long_name": "Normalized Burn Ratio Thermal 1", + "platforms": [ + "Landsat-TM", + "Landsat-ETM+" + ], + "reference": "https://doi.org/10.1080/01431160500239008", + "short_name": "NBRT1" + }, + "NBRT2": { + "application_domain": "burn", + "bands": [ + "N", + "T", + "S2" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-19", + "formula": "((N / (T / 10000.0)) - S2) / ((N / (T / 10000.0)) + S2)", + "long_name": "Normalized Burn Ratio Thermal 2", + "platforms": [ + "Landsat-TM", + "Landsat-ETM+" + ], + "reference": "https://doi.org/10.1080/01431160500239008", + "short_name": "NBRT2" + }, + "NBRT3": { + "application_domain": "burn", + "bands": [ + "N", + "T", + "S2" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-19", + "formula": "((N - (T / 10000.0)) - S2) / ((N - (T / 10000.0)) + S2)", + "long_name": "Normalized Burn Ratio Thermal 3", + "platforms": [ + "Landsat-TM", + "Landsat-ETM+" + ], + "reference": "https://doi.org/10.1080/01431160500239008", + "short_name": "NBRT3" + }, + "NBRplus": { + "application_domain": "burn", + "bands": [ + "S2", + "N2", + "G", + "B" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-09-22", + "formula": "(S2 - N2 - G - B)/(S2 + N2 + G + B)", + "long_name": "Normalized Burn Ratio Plus", + "platforms": [ + "Sentinel-2" + ], + "reference": "https://doi.org/10.3390/rs14071727", + "short_name": "NBRplus" + }, + "NBSIMS": { + "application_domain": "snow", + "bands": [ + "G", + "R", + "N", + "B", + "S2", + "S1" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-09", + "formula": "0.36 * (G + R + N) - (((B + S2)/G) + S1)", + "long_name": "Non-Binary Snow Index for Multi-Component Surfaces", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.3390/rs13142777", + "short_name": "NBSIMS" + }, + "NBUI": { + "application_domain": "urban", + "bands": [ + "S1", + "N", + "T", + "R", + "L", + "G" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-18", + "formula": "((S1 - N)/(10.0 * (T + S1) ** 0.5)) - (((N - R) * (1.0 + L))/(N - R + L)) - (G - S1)/(G + S1)", + "long_name": "New Built-Up Index", + "platforms": [ + "Landsat-TM", + "Landsat-ETM+" + ], + "reference": "https://hdl.handle.net/1959.11/29500", + "short_name": "NBUI" + }, + "ND705": { + "application_domain": "vegetation", + "bands": [ + "RE2", + "RE1" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-08", + "formula": "(RE2 - RE1)/(RE2 + RE1)", + "long_name": "Normalized Difference (705 and 750 nm)", + "platforms": [ + "Sentinel-2" + ], + "reference": "https://doi.org/10.1016/S0034-4257(02)00010-X", + "short_name": "ND705" + }, + "NDBI": { + "application_domain": "urban", + "bands": [ + "S1", + "N" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-05-14", + "formula": "(S1 - N) / (S1 + N)", + "long_name": "Normalized Difference Built-Up Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "http://dx.doi.org/10.1080/01431160304987", + "short_name": "NDBI" + }, + "NDBaI": { + "application_domain": "soil", + "bands": [ + "S1", + "T" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-09-17", + "formula": "(S1 - T) / (S1 + T)", + "long_name": "Normalized Difference Bareness Index", + "platforms": [ + "Landsat-TM", + "Landsat-ETM+" + ], + "reference": "https://doi.org/10.1109/IGARSS.2005.1526319", + "short_name": "NDBaI" + }, + "NDCI": { + "application_domain": "water", + "bands": [ + "RE1", + "R" + ], + "contributor": "https://github.com/kalab-oto", + "date_of_addition": "2022-10-10", + "formula": "(RE1 - R)/(RE1 + R)", + "long_name": "Normalized Difference Chlorophyll Index", + "platforms": [ + "Sentinel-2" + ], + "reference": "https://doi.org/10.1016/j.rse.2011.10.016", + "short_name": "NDCI" + }, + "NDDI": { + "application_domain": "vegetation", + "bands": [ + "N", + "R", + "G" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-04-07", + "formula": "(((N - R)/(N + R)) - ((G - N)/(G + N)))/(((N - R)/(N + R)) + ((G - N)/(G + N)))", + "long_name": "Normalized Difference Drought Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1029/2006GL029127", + "short_name": "NDDI" + }, + "NDGI": { + "application_domain": "vegetation", + "bands": [ + "lambdaN", + "lambdaR", + "lambdaG", + "G", + "N", + "R" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-01-20", + "formula": "(((lambdaN - lambdaR)/(lambdaN - lambdaG)) * G + (1.0 - ((lambdaN - lambdaR)/(lambdaN - lambdaG))) * N - R)/(((lambdaN - lambdaR)/(lambdaN - lambdaG)) * G + (1.0 - ((lambdaN - lambdaR)/(lambdaN - lambdaG))) * N + R)", + "long_name": "Normalized Difference Greenness Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1016/j.rse.2019.03.028", + "short_name": "NDGI" + }, + "NDGlaI": { + "application_domain": "snow", + "bands": [ + "G", + "R" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-08", + "formula": "(G - R)/(G + R)", + "long_name": "Normalized Difference Glacier Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1080/01431160802385459", + "short_name": "NDGlaI" + }, + "NDII": { + "application_domain": "vegetation", + "bands": [ + "N", + "S1" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-01-20", + "formula": "(N - S1)/(N + S1)", + "long_name": "Normalized Difference Infrared Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://www.asprs.org/wp-content/uploads/pers/1983journal/jan/1983_jan_77-83.pdf", + "short_name": "NDII" + }, + "NDISIb": { + "application_domain": "urban", + "bands": [ + "T", + "B", + "N", + "S1" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-18", + "formula": "(T - (B + N + S1) / 3.0)/(T + (B + N + S1) / 3.0)", + "long_name": "Normalized Difference Impervious Surface Index Blue", + "platforms": [ + "Landsat-TM", + "Landsat-ETM+" + ], + "reference": "https://doi.org/10.14358/PERS.76.5.557", + "short_name": "NDISIb" + }, + "NDISIg": { + "application_domain": "urban", + "bands": [ + "T", + "G", + "N", + "S1" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-18", + "formula": "(T - (G + N + S1) / 3.0)/(T + (G + N + S1) / 3.0)", + "long_name": "Normalized Difference Impervious Surface Index Green", + "platforms": [ + "Landsat-TM", + "Landsat-ETM+" + ], + "reference": "https://doi.org/10.14358/PERS.76.5.557", + "short_name": "NDISIg" + }, + "NDISImndwi": { + "application_domain": "urban", + "bands": [ + "T", + "G", + "S1", + "N" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-18", + "formula": "(T - (((G - S1)/(G + S1)) + N + S1) / 3.0)/(T + (((G - S1)/(G + S1)) + N + S1) / 3.0)", + "long_name": "Normalized Difference Impervious Surface Index with MNDWI", + "platforms": [ + "Landsat-TM", + "Landsat-ETM+" + ], + "reference": "https://doi.org/10.14358/PERS.76.5.557", + "short_name": "NDISImndwi" + }, + "NDISIndwi": { + "application_domain": "urban", + "bands": [ + "T", + "G", + "N", + "S1" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-18", + "formula": "(T - (((G - N)/(G + N)) + N + S1) / 3.0)/(T + (((G - N)/(G + N)) + N + S1) / 3.0)", + "long_name": "Normalized Difference Impervious Surface Index with NDWI", + "platforms": [ + "Landsat-TM", + "Landsat-ETM+" + ], + "reference": "https://doi.org/10.14358/PERS.76.5.557", + "short_name": "NDISIndwi" + }, + "NDISIr": { + "application_domain": "urban", + "bands": [ + "T", + "R", + "N", + "S1" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-18", + "formula": "(T - (R + N + S1) / 3.0)/(T + (R + N + S1) / 3.0)", + "long_name": "Normalized Difference Impervious Surface Index Red", + "platforms": [ + "Landsat-TM", + "Landsat-ETM+" + ], + "reference": "https://doi.org/10.14358/PERS.76.5.557", + "short_name": "NDISIr" + }, + "NDMI": { + "application_domain": "vegetation", + "bands": [ + "N", + "S1" + ], + "contributor": "https://github.com/bpurinton", + "date_of_addition": "2021-12-01", + "formula": "(N - S1)/(N + S1)", + "long_name": "Normalized Difference Moisture Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.1016/S0034-4257(01)00318-2", + "short_name": "NDMI" + }, + "NDPI": { + "application_domain": "vegetation", + "bands": [ + "N", + "alpha", + "R", + "S1" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-01-20", + "formula": "(N - (alpha * R + (1.0 - alpha) * S1))/(N + (alpha * R + (1.0 - alpha) * S1))", + "long_name": "Normalized Difference Phenology Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.1016/j.rse.2017.04.031", + "short_name": "NDPI" + }, + "NDPolI": { + "application_domain": "radar", + "bands": [ + "VV", + "VH" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-19", + "formula": "(VV - VH)/(VV + VH)", + "long_name": "Normalized Difference Polarization Index", + "platforms": [ + "Sentinel-1 (Dual Polarisation VV-VH)" + ], + "reference": "https://www.isprs.org/proceedings/XXXVII/congress/4_pdf/267.pdf", + "short_name": "NDPolI" + }, + "NDPonI": { + "application_domain": "water", + "bands": [ + "S1", + "G" + ], + "contributor": "https://github.com/CvenGeo", + "date_of_addition": "2022-10-03", + "formula": "(S1-G)/(S1+G)", + "long_name": "Normalized Difference Pond Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.1016/j.rse.2006.07.012", + "short_name": "NDPonI" + }, + "NDREI": { + "application_domain": "vegetation", + "bands": [ + "N", + "RE1" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-05-13", + "formula": "(N - RE1) / (N + RE1)", + "long_name": "Normalized Difference Red Edge Index", + "platforms": [ + "Sentinel-2" + ], + "reference": "https://doi.org/10.1016/1011-1344(93)06963-4", + "short_name": "NDREI" + }, + "NDSI": { + "application_domain": "snow", + "bands": [ + "G", + "S1" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-04-07", + "formula": "(G - S1) / (G + S1)", + "long_name": "Normalized Difference Snow Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.1109/IGARSS.1994.399618", + "short_name": "NDSI" + }, + "NDSII": { + "application_domain": "snow", + "bands": [ + "G", + "N" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-08", + "formula": "(G - N)/(G + N)", + "long_name": "Normalized Difference Snow Ice Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1080/01431160802385459", + "short_name": "NDSII" + }, + "NDSIWV": { + "application_domain": "soil", + "bands": [ + "G", + "Y" + ], + "contributor": "https://github.com/remi-braun", + "date_of_addition": "2022-11-20", + "formula": "(G - Y)/(G + Y)", + "long_name": "WorldView Normalized Difference Soil Index", + "platforms": [], + "reference": "https://www.semanticscholar.org/paper/Using-WorldView-2-Vis-NIR-MSI-Imagery-to-Support-Wolf/5e5063ccc4ee76b56b721c866e871d47a77f9fb4", + "short_name": "NDSIWV" + }, + "NDSInw": { + "application_domain": "snow", + "bands": [ + "N", + "S1", + "beta" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-08", + "formula": "(N - S1 - beta)/(N + S1)", + "long_name": "Normalized Difference Snow Index with no Water", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.3390/w12051339", + "short_name": "NDSInw" + }, + "NDSWIR": { + "application_domain": "burn", + "bands": [ + "N", + "S1" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-09-22", + "formula": "(N - S1)/(N + S1)", + "long_name": "Normalized Difference SWIR", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.1109/TGRS.2003.819190", + "short_name": "NDSWIR" + }, + "NDSaII": { + "application_domain": "snow", + "bands": [ + "R", + "S1" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-09-18", + "formula": "(R - S1) / (R + S1)", + "long_name": "Normalized Difference Snow and Ice Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.1080/01431160119766", + "short_name": "NDSaII" + }, + "NDSoI": { + "application_domain": "soil", + "bands": [ + "S2", + "G" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-18", + "formula": "(S2 - G)/(S2 + G)", + "long_name": "Normalized Difference Soil Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.1016/j.jag.2015.02.010", + "short_name": "NDSoiI" + }, + "NDTI": { + "application_domain": "water", + "bands": [ + "R", + "G" + ], + "contributor": "https://github.com/CvenGeo", + "date_of_addition": "2022-10-03", + "formula": "(R-G)/(R+G)", + "long_name": "Normalized Difference Turbidity Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1016/j.rse.2006.07.012", + "short_name": "NDTI" + }, + "NDVI": { + "application_domain": "vegetation", + "bands": [ + "N", + "R" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-04-07", + "formula": "(N - R)/(N + R)", + "long_name": "Normalized Difference Vegetation Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://ntrs.nasa.gov/citations/19740022614", + "short_name": "NDVI" + }, + "NDVI705": { + "application_domain": "vegetation", + "bands": [ + "RE2", + "RE1" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-11-06", + "formula": "(RE2 - RE1) / (RE2 + RE1)", + "long_name": "Normalized Difference Vegetation Index (705 and 750 nm)", + "platforms": [ + "Sentinel-2" + ], + "reference": "https://doi.org/10.1016/S0176-1617(11)81633-0", + "short_name": "NDVI705" + }, + "NDVIMNDWI": { + "application_domain": "water", + "bands": [ + "N", + "R", + "G", + "S1" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-01-17", + "formula": "((N - R)/(N + R)) - ((G - S1)/(G + S1))", + "long_name": "NDVI-MNDWI Model", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.1007/978-3-662-45737-5_51", + "short_name": "NDVIMNDWI" + }, + "NDVIT": { + "application_domain": "burn", + "bands": [ + "N", + "R", + "T" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-04-07", + "formula": "(N - (R * T / 10000.0))/(N + (R * T / 10000.0))", + "long_name": "Normalized Difference Vegetation Index Thermal", + "platforms": [ + "Landsat-TM", + "Landsat-ETM+" + ], + "reference": "https://doi.org/10.1080/01431160600954704", + "short_name": "NDVIT" + }, + "NDWI": { + "application_domain": "water", + "bands": [ + "G", + "N" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-04-07", + "formula": "(G - N) / (G + N)", + "long_name": "Normalized Difference Water Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1080/01431169608948714", + "short_name": "NDWI" + }, + "NDWIns": { + "application_domain": "water", + "bands": [ + "G", + "alpha", + "N" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-08", + "formula": "(G - alpha * N)/(G + N)", + "long_name": "Normalized Difference Water Index with no Snow Cover and Glaciers", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.3390/w12051339", + "short_name": "NDWIns" + }, + "NDYI": { + "application_domain": "vegetation", + "bands": [ + "G", + "B" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-09-18", + "formula": "(G - B) / (G + B)", + "long_name": "Normalized Difference Yellowness Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1016/j.rse.2016.06.016", + "short_name": "NDYI" + }, + "NGRDI": { + "application_domain": "vegetation", + "bands": [ + "G", + "R" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-04-07", + "formula": "(G - R) / (G + R)", + "long_name": "Normalized Green Red Difference Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1016/0034-4257(79)90013-0", + "short_name": "NGRDI" + }, + "NHFD": { + "application_domain": "urban", + "bands": [ + "RE1", + "A" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-09-17", + "formula": "(RE1 - A) / (RE1 + A)", + "long_name": "Non-Homogeneous Feature Difference", + "platforms": [ + "Sentinel-2" + ], + "reference": "https://www.semanticscholar.org/paper/Using-WorldView-2-Vis-NIR-MSI-Imagery-to-Support-Wolf/5e5063ccc4ee76b56b721c866e871d47a77f9fb4", + "short_name": "NHFD" + }, + "NIRv": { + "application_domain": "vegetation", + "bands": [ + "N", + "R" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-11-16", + "formula": "((N - R) / (N + R)) * N", + "long_name": "Near-Infrared Reflectance of Vegetation", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1126/sciadv.1602244", + "short_name": "NIRv" + }, + "NIRvH2": { + "application_domain": "vegetation", + "bands": [ + "N", + "R", + "k", + "lambdaN", + "lambdaR" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-01-17", + "formula": "N - R - k * (lambdaN - lambdaR)", + "long_name": "Hyperspectral Near-Infrared Reflectance of Vegetation", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1016/j.rse.2021.112723", + "short_name": "NIRvH2" + }, + "NIRvP": { + "application_domain": "vegetation", + "bands": [ + "N", + "R", + "PAR" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-11-18", + "formula": "((N - R) / (N + R)) * N * PAR", + "long_name": "Near-Infrared Reflectance of Vegetation and Incoming PAR", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1016/j.rse.2021.112763", + "short_name": "NIRvP" + }, + "NLI": { + "application_domain": "vegetation", + "bands": [ + "N", + "R" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-05-11", + "formula": "((N ** 2) - R)/((N ** 2) + R)", + "long_name": "Non-Linear Vegetation Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1080/02757259409532252", + "short_name": "NLI" + }, + "NMDI": { + "application_domain": "vegetation", + "bands": [ + "N", + "S1", + "S2" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-05-11", + "formula": "(N - (S1 - S2))/(N + (S1 - S2))", + "long_name": "Normalized Multi-band Drought Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.1029/2007GL031021", + "short_name": "NMDI" + }, + "NRFIg": { + "application_domain": "vegetation", + "bands": [ + "G", + "S2" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-09-18", + "formula": "(G - S2) / (G + S2)", + "long_name": "Normalized Rapeseed Flowering Index Green", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.3390/rs13010105", + "short_name": "NRFIg" + }, + "NRFIr": { + "application_domain": "vegetation", + "bands": [ + "R", + "S2" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-09-18", + "formula": "(R - S2) / (R + S2)", + "long_name": "Normalized Rapeseed Flowering Index Red", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.3390/rs13010105", + "short_name": "NRFIr" + }, + "NSDS": { + "application_domain": "soil", + "bands": [ + "S1", + "S2" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-18", + "formula": "(S1 - S2)/(S1 + S2)", + "long_name": "Normalized Shortwave Infrared Difference Soil-Moisture", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.3390/land10030231", + "short_name": "NSDS" + }, + "NSDSI1": { + "application_domain": "soil", + "bands": [ + "S1", + "S2" + ], + "contributor": "https://github.com/CvenGeo", + "date_of_addition": "2022-10-03", + "formula": "(S1-S2)/S1", + "long_name": "Normalized Shortwave-Infrared Difference Bare Soil Moisture Index 1", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.1016/j.isprsjprs.2019.06.012", + "short_name": "NSDSI1" + }, + "NSDSI2": { + "application_domain": "soil", + "bands": [ + "S1", + "S2" + ], + "contributor": "https://github.com/CvenGeo", + "date_of_addition": "2022-10-03", + "formula": "(S1-S2)/S2", + "long_name": "Normalized Shortwave-Infrared Difference Bare Soil Moisture Index 2", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.1016/j.isprsjprs.2019.06.012", + "short_name": "NSDSI2" + }, + "NSDSI3": { + "application_domain": "soil", + "bands": [ + "S1", + "S2" + ], + "contributor": "https://github.com/CvenGeo", + "date_of_addition": "2022-10-03", + "formula": "(S1-S2)/(S1+S2)", + "long_name": "Normalized Shortwave-Infrared Difference Bare Soil Moisture Index 3", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.1016/j.isprsjprs.2019.06.012", + "short_name": "NSDSI3" + }, + "NSTv1": { + "application_domain": "burn", + "bands": [ + "N", + "S2", + "T" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-10-06", + "formula": "((N-S2)/(N+S2))*T", + "long_name": "NIR-SWIR-Temperature Version 1", + "platforms": [ + "Landsat-TM", + "Landsat-ETM+" + ], + "reference": "https://doi.org/10.1016/j.rse.2011.06.010", + "short_name": "NSTv1" + }, + "NSTv2": { + "application_domain": "burn", + "bands": [ + "N", + "S2", + "T" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-10-06", + "formula": "(N-(S2+T))/(N+(S2+T))", + "long_name": "NIR-SWIR-Temperature Version 2", + "platforms": [ + "Landsat-TM", + "Landsat-ETM+" + ], + "reference": "https://doi.org/10.1016/j.rse.2011.06.010", + "short_name": "NSTv2" + }, + "NWI": { + "application_domain": "water", + "bands": [ + "B", + "N", + "S1", + "S2" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-01-17", + "formula": "(B - (N + S1 + S2))/(B + (N + S1 + S2))", + "long_name": "New Water Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.11873/j.issn.1004-0323.2009.2.167", + "short_name": "NWI" + }, + "NormG": { + "application_domain": "vegetation", + "bands": [ + "G", + "N", + "R" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-08", + "formula": "G/(N + G + R)", + "long_name": "Normalized Green", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.2134/agronj2004.0314", + "short_name": "NormG" + }, + "NormNIR": { + "application_domain": "vegetation", + "bands": [ + "N", + "G", + "R" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-08", + "formula": "N/(N + G + R)", + "long_name": "Normalized NIR", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.2134/agronj2004.0314", + "short_name": "NormNIR" + }, + "NormR": { + "application_domain": "vegetation", + "bands": [ + "R", + "N", + "G" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-08", + "formula": "R/(N + G + R)", + "long_name": "Normalized Red", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.2134/agronj2004.0314", + "short_name": "NormR" + }, + "OCVI": { + "application_domain": "vegetation", + "bands": [ + "N", + "G", + "R", + "cexp" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-05-13", + "formula": "(N / G) * (R / G) ** cexp", + "long_name": "Optimized Chlorophyll Vegetation Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "http://dx.doi.org/10.1007/s11119-008-9075-z", + "short_name": "OCVI" + }, + "OSAVI": { + "application_domain": "vegetation", + "bands": [ + "N", + "R" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-05-11", + "formula": "(N - R) / (N + R + 0.16)", + "long_name": "Optimized Soil-Adjusted Vegetation Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1016/0034-4257(95)00186-7", + "short_name": "OSAVI" + }, + "PISI": { + "application_domain": "urban", + "bands": [ + "B", + "N" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-18", + "formula": "0.8192 * B - 0.5735 * N + 0.0750", + "long_name": "Perpendicular Impervious Surface Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.3390/rs10101521", + "short_name": "PISI" + }, + "PSRI": { + "application_domain": "vegetation", + "bands": [ + "R", + "B", + "RE2" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-08", + "formula": "(R - B)/RE2", + "long_name": "Plant Senescing Reflectance Index", + "platforms": [ + "Sentinel-2" + ], + "reference": "https://doi.org/10.1034/j.1399-3054.1999.106119.x", + "short_name": "PSRI" + }, + "QpRVI": { + "application_domain": "radar", + "bands": [ + "HV", + "HH", + "VV" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-12-24", + "formula": "(8.0 * HV)/(HH + VV + 2.0 * HV)", + "long_name": "Quad-Polarized Radar Vegetation Index", + "platforms": [], + "reference": "https://doi.org/10.1109/IGARSS.2001.976856", + "short_name": "QpRVI" + }, + "RCC": { + "application_domain": "vegetation", + "bands": [ + "R", + "G", + "B" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-01-17", + "formula": "R / (R + G + B)", + "long_name": "Red Chromatic Coordinate", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1016/0034-4257(87)90088-5", + "short_name": "RCC" + }, + "RDVI": { + "application_domain": "vegetation", + "bands": [ + "N", + "R" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-05-14", + "formula": "(N - R) / ((N + R) ** 0.5)", + "long_name": "Renormalized Difference Vegetation Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1016/0034-4257(94)00114-3", + "short_name": "RDVI" + }, + "REDSI": { + "application_domain": "vegetation", + "bands": [ + "RE3", + "R", + "RE1" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-11-06", + "formula": "((705.0 - 665.0) * (RE3 - R) - (783.0 - 665.0) * (RE1 - R)) / (2.0 * R)", + "long_name": "Red-Edge Disease Stress Index", + "platforms": [ + "Sentinel-2" + ], + "reference": "https://doi.org/10.3390/s18030868", + "short_name": "REDSI" + }, + "RENDVI": { + "application_domain": "vegetation", + "bands": [ + "RE2", + "RE1" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-09", + "formula": "(RE2 - RE1)/(RE2 + RE1)", + "long_name": "Red Edge Normalized Difference Vegetation Index", + "platforms": [ + "Sentinel-2" + ], + "reference": "https://doi.org/10.1016/S0176-1617(11)81633-0", + "short_name": "RENDVI" + }, + "RFDI": { + "application_domain": "radar", + "bands": [ + "HH", + "HV" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-12-25", + "formula": "(HH - HV)/(HH + HV)", + "long_name": "Radar Forest Degradation Index", + "platforms": [ + "Sentinel-1 (Dual Polarisation HH-HV)" + ], + "reference": "https://doi.org/10.5194/bg-9-179-2012", + "short_name": "RFDI" + }, + "RGBVI": { + "application_domain": "vegetation", + "bands": [ + "G", + "B", + "R" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-08", + "formula": "(G ** 2.0 - B * R)/(G ** 2.0 + B * R)", + "long_name": "Red Green Blue Vegetation Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1016/j.jag.2015.02.012", + "short_name": "RGBVI" + }, + "RGRI": { + "application_domain": "vegetation", + "bands": [ + "R", + "G" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-08", + "formula": "R/G", + "long_name": "Red-Green Ratio Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1016/j.jag.2014.03.018", + "short_name": "RGRI" + }, + "RI": { + "application_domain": "vegetation", + "bands": [ + "R", + "G" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-09", + "formula": "(R - G)/(R + G)", + "long_name": "Redness Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://www.documentation.ird.fr/hor/fdi:34390", + "short_name": "RI" + }, + "RI4XS": { + "application_domain": "soil", + "bands": [ + "R", + "G" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-11-20", + "formula": "(R**2.0)/(G**4.0)", + "long_name": "SPOT HRV XS-based Redness Index 4", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1016/S0034-4257(98)00030-3", + "short_name": "RI4XS" + }, + "RVI": { + "application_domain": "vegetation", + "bands": [ + "RE2", + "R" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-04-07", + "formula": "RE2 / R", + "long_name": "Ratio Vegetation Index", + "platforms": [ + "Sentinel-2" + ], + "reference": "https://doi.org/10.2134/agronj1968.00021962006000060016x", + "short_name": "RVI" + }, + "S2REP": { + "application_domain": "vegetation", + "bands": [ + "RE3", + "R", + "RE1", + "RE2" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-09-17", + "formula": "705.0 + 35.0 * ((((RE3 + R) / 2.0) - RE1) / (RE2 - RE1))", + "long_name": "Sentinel-2 Red-Edge Position", + "platforms": [ + "Sentinel-2" + ], + "reference": "https://doi.org/10.1016/j.isprsjprs.2013.04.007", + "short_name": "S2REP" + }, + "S2WI": { + "application_domain": "water", + "bands": [ + "RE1", + "S2" + ], + "contributor": "https://github.com/MATRIX4284", + "date_of_addition": "2022-03-06", + "formula": "(RE1 - S2)/(RE1 + S2)", + "long_name": "Sentinel-2 Water Index", + "platforms": [ + "Sentinel-2" + ], + "reference": "https://doi.org/10.3390/w13121647", + "short_name": "S2WI" + }, + "S3": { + "application_domain": "snow", + "bands": [ + "N", + "R", + "S1" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-09-18", + "formula": "(N * (R - S1)) / ((N + R) * (N + S1))", + "long_name": "S3 Snow Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.3178/jjshwr.12.28", + "short_name": "S3" + }, + "SARVI": { + "application_domain": "vegetation", + "bands": [ + "L", + "N", + "R", + "B" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-05-11", + "formula": "(1 + L)*(N - (R - (R - B))) / (N + (R - (R - B)) + L)", + "long_name": "Soil Adjusted and Atmospherically Resistant Vegetation Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1109/36.134076", + "short_name": "SARVI" + }, + "SAVI": { + "application_domain": "vegetation", + "bands": [ + "L", + "N", + "R" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-04-07", + "formula": "(1.0 + L) * (N - R) / (N + R + L)", + "long_name": "Soil-Adjusted Vegetation Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1016/0034-4257(88)90106-X", + "short_name": "SAVI" + }, + "SAVI2": { + "application_domain": "vegetation", + "bands": [ + "N", + "R", + "slb", + "sla" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-05-14", + "formula": "N / (R + (slb / sla))", + "long_name": "Soil-Adjusted Vegetation Index 2", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1080/01431169008955053", + "short_name": "SAVI2" + }, + "SAVIT": { + "application_domain": "burn", + "bands": [ + "L", + "N", + "R", + "T" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-04-07", + "formula": "(1.0 + L) * (N - (R * T / 10000.0)) / (N + (R * T / 10000.0) + L)", + "long_name": "Soil-Adjusted Vegetation Index Thermal", + "platforms": [ + "Landsat-TM", + "Landsat-ETM+" + ], + "reference": "https://doi.org/10.1080/01431160600954704", + "short_name": "SAVIT" + }, + "SEVI": { + "application_domain": "vegetation", + "bands": [ + "N", + "R", + "fdelta" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-09-22", + "formula": "(N/R) + fdelta * (1.0/R)", + "long_name": "Shadow-Eliminated Vegetation Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1080/17538947.2018.1495770", + "short_name": "SEVI" + }, + "SI": { + "application_domain": "vegetation", + "bands": [ + "B", + "G", + "R" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-08", + "formula": "((1.0 - B) * (1.0 - G) * (1.0 - R)) ** (1/3)", + "long_name": "Shadow Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.465.8749&rep=rep1&type=pdf", + "short_name": "SI" + }, + "SIPI": { + "application_domain": "vegetation", + "bands": [ + "N", + "A", + "R" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-09-17", + "formula": "(N - A) / (N - R)", + "long_name": "Structure Insensitive Pigment Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI" + ], + "reference": "https://eurekamag.com/research/009/395/009395053.php", + "short_name": "SIPI" + }, + "SR": { + "application_domain": "vegetation", + "bands": [ + "N", + "R" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-08", + "formula": "N/R", + "long_name": "Simple Ratio", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.2307/1936256", + "short_name": "SR" + }, + "SR2": { + "application_domain": "vegetation", + "bands": [ + "N", + "G" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-09", + "formula": "N/G", + "long_name": "Simple Ratio (800 and 550 nm)", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1080/01431169308904370", + "short_name": "SR2" + }, + "SR3": { + "application_domain": "vegetation", + "bands": [ + "N2", + "G", + "RE1" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-09", + "formula": "N2/(G * RE1)", + "long_name": "Simple Ratio (860, 550 and 708 nm)", + "platforms": [ + "Sentinel-2" + ], + "reference": "https://doi.org/10.1016/S0034-4257(98)00046-7", + "short_name": "SR3" + }, + "SR555": { + "application_domain": "vegetation", + "bands": [ + "RE2", + "G" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-11-06", + "formula": "RE2 / G", + "long_name": "Simple Ratio (555 and 750 nm)", + "platforms": [ + "Sentinel-2" + ], + "reference": "https://doi.org/10.1016/S0176-1617(11)81633-0", + "short_name": "SR555" + }, + "SR705": { + "application_domain": "vegetation", + "bands": [ + "RE2", + "RE1" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-11-06", + "formula": "RE2 / RE1", + "long_name": "Simple Ratio (705 and 750 nm)", + "platforms": [ + "Sentinel-2" + ], + "reference": "https://doi.org/10.1016/S0176-1617(11)81633-0", + "short_name": "SR705" + }, + "SWI": { + "application_domain": "snow", + "bands": [ + "G", + "N", + "S1" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-09-18", + "formula": "(G * (N - S1)) / ((G + N) * (N + S1))", + "long_name": "Snow Water Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.3390/rs11232774", + "short_name": "SWI" + }, + "SWM": { + "application_domain": "water", + "bands": [ + "B", + "G", + "N", + "S1" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-20", + "formula": "(B + G)/(N + S1)", + "long_name": "Sentinel Water Mask", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://eoscience.esa.int/landtraining2017/files/posters/MILCZAREK.pdf", + "short_name": "SWM" + }, + "SeLI": { + "application_domain": "vegetation", + "bands": [ + "N2", + "RE1" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-04-08", + "formula": "(N2 - RE1) / (N2 + RE1)", + "long_name": "Sentinel-2 LAI Green Index", + "platforms": [ + "Sentinel-2" + ], + "reference": "https://doi.org/10.3390/s19040904", + "short_name": "SeLI" + }, + "TCARI": { + "application_domain": "vegetation", + "bands": [ + "RE1", + "R", + "G" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-05-13", + "formula": "3 * ((RE1 - R) - 0.2 * (RE1 - G) * (RE1 / R))", + "long_name": "Transformed Chlorophyll Absorption in Reflectance Index", + "platforms": [ + "Sentinel-2" + ], + "reference": "https://doi.org/10.1016/S0034-4257(02)00018-4", + "short_name": "TCARI" + }, + "TCARIOSAVI": { + "application_domain": "vegetation", + "bands": [ + "RE1", + "R", + "G", + "N" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-11-06", + "formula": "(3 * ((RE1 - R) - 0.2 * (RE1 - G) * (RE1 / R))) / (1.16 * (N - R) / (N + R + 0.16))", + "long_name": "TCARI/OSAVI Ratio", + "platforms": [ + "Sentinel-2" + ], + "reference": "https://doi.org/10.1016/S0034-4257(02)00018-4", + "short_name": "TCARIOSAVI" + }, + "TCARIOSAVI705": { + "application_domain": "vegetation", + "bands": [ + "RE2", + "RE1", + "G" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-11-06", + "formula": "(3 * ((RE2 - RE1) - 0.2 * (RE2 - G) * (RE2 / RE1))) / (1.16 * (RE2 - RE1) / (RE2 + RE1 + 0.16))", + "long_name": "TCARI/OSAVI Ratio (705 and 750 nm)", + "platforms": [ + "Sentinel-2" + ], + "reference": "https://doi.org/10.1016/j.agrformet.2008.03.005", + "short_name": "TCARIOSAVI705" + }, + "TCI": { + "application_domain": "vegetation", + "bands": [ + "RE1", + "G", + "R" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-05-14", + "formula": "1.2 * (RE1 - G) - 1.5 * (R - G) * (RE1 / R) ** 0.5", + "long_name": "Triangular Chlorophyll Index", + "platforms": [ + "Sentinel-2" + ], + "reference": "http://dx.doi.org/10.1109/TGRS.2007.904836", + "short_name": "TCI" + }, + "TDVI": { + "application_domain": "vegetation", + "bands": [ + "N", + "R" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-09", + "formula": "1.5 * ((N - R)/((N ** 2.0 + R + 0.5) ** 0.5))", + "long_name": "Transformed Difference Vegetation Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1109/IGARSS.2002.1026867", + "short_name": "TDVI" + }, + "TGI": { + "application_domain": "vegetation", + "bands": [ + "R", + "G", + "B" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-05-14", + "formula": "- 0.5 * (190 * (R - G) - 120 * (R - B))", + "long_name": "Triangular Greenness Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "http://dx.doi.org/10.1016/j.jag.2012.07.020", + "short_name": "TGI" + }, + "TRRVI": { + "application_domain": "vegetation", + "bands": [ + "RE2", + "R", + "N" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-09-18", + "formula": "((RE2 - R) / (RE2 + R)) / (((N - R) / (N + R)) + 1.0)", + "long_name": "Transformed Red Range Vegetation Index", + "platforms": [ + "Sentinel-2" + ], + "reference": "https://doi.org/10.3390/rs12152359", + "short_name": "TRRVI" + }, + "TSAVI": { + "application_domain": "vegetation", + "bands": [ + "sla", + "N", + "R", + "slb" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-05-14", + "formula": "sla * (N - sla * R - slb) / (sla * N + R - sla * slb)", + "long_name": "Transformed Soil-Adjusted Vegetation Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1109/IGARSS.1989.576128", + "short_name": "TSAVI" + }, + "TTVI": { + "application_domain": "vegetation", + "bands": [ + "RE3", + "RE2", + "N2" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-09-18", + "formula": "0.5 * ((865.0 - 740.0) * (RE3 - RE2) - (N2 - RE2) * (783.0 - 740))", + "long_name": "Transformed Triangular Vegetation Index", + "platforms": [ + "Sentinel-2" + ], + "reference": "https://doi.org/10.3390/rs12010016", + "short_name": "TTVI" + }, + "TVI": { + "application_domain": "vegetation", + "bands": [ + "N", + "R" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-08", + "formula": "(((N - R)/(N + R)) + 0.5) ** 0.5", + "long_name": "Transformed Vegetation Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://ntrs.nasa.gov/citations/19740022614", + "short_name": "TVI" + }, + "TWI": { + "application_domain": "water", + "bands": [ + "RE1", + "RE2", + "G", + "S2", + "B", + "N" + ], + "contributor": "https://github.com/remi-braun", + "date_of_addition": "2023-02-10", + "formula": "(2.84 * (RE1 - RE2) / (G + S2)) + ((1.25 * (G - B) - (N - B)) / (N + 1.25 * G - 0.25 * B))", + "long_name": "Triangle Water Index", + "platforms": [ + "Sentinel-2" + ], + "reference": "https://doi.org/10.3390/rs14215289", + "short_name": "TWI" + }, + "TriVI": { + "application_domain": "vegetation", + "bands": [ + "N", + "G", + "R" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-05-14", + "formula": "0.5 * (120 * (N - G) - 200 * (R - G))", + "long_name": "Triangular Vegetation Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "http://dx.doi.org/10.1016/S0034-4257(00)00197-8", + "short_name": "TriVI" + }, + "UI": { + "application_domain": "urban", + "bands": [ + "S2", + "N" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-02-07", + "formula": "(S2 - N)/(S2 + N)", + "long_name": "Urban Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://www.isprs.org/proceedings/XXXI/congress/part7/321_XXXI-part7.pdf", + "short_name": "UI" + }, + "VARI": { + "application_domain": "vegetation", + "bands": [ + "G", + "R", + "B" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-04-07", + "formula": "(G - R) / (G + R - B)", + "long_name": "Visible Atmospherically Resistant Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1016/S0034-4257(01)00289-9", + "short_name": "VARI" + }, + "VARI700": { + "application_domain": "vegetation", + "bands": [ + "RE1", + "R", + "B" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-09-20", + "formula": "(RE1 - 1.7 * R + 0.7 * B) / (RE1 + 1.3 * R - 1.3 * B)", + "long_name": "Visible Atmospherically Resistant Index (700 nm)", + "platforms": [ + "Sentinel-2" + ], + "reference": "https://doi.org/10.1016/S0034-4257(01)00289-9", + "short_name": "VARI700" + }, + "VDDPI": { + "application_domain": "radar", + "bands": [ + "VV", + "VH" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-19", + "formula": "(VV + VH)/VV", + "long_name": "Vertical Dual De-Polarization Index", + "platforms": [ + "Sentinel-1 (Dual Polarisation VV-VH)" + ], + "reference": "https://doi.org/10.1016/j.rse.2018.09.003", + "short_name": "VDDPI" + }, + "VHVVD": { + "application_domain": "radar", + "bands": [ + "VH", + "VV" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-19", + "formula": "VH - VV", + "long_name": "VH-VV Difference", + "platforms": [ + "Sentinel-1 (Dual Polarisation VV-VH)" + ], + "reference": "https://doi.org/10.3390/app9040655", + "short_name": "VHVVD" + }, + "VHVVP": { + "application_domain": "radar", + "bands": [ + "VH", + "VV" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-19", + "formula": "VH * VV", + "long_name": "VH-VV Product", + "platforms": [ + "Sentinel-1 (Dual Polarisation VV-VH)" + ], + "reference": "https://doi.org/10.1109/IGARSS47720.2021.9554099", + "short_name": "VHVVP" + }, + "VHVVR": { + "application_domain": "radar", + "bands": [ + "VH", + "VV" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-19", + "formula": "VH/VV", + "long_name": "VH-VV Ratio", + "platforms": [ + "Sentinel-1 (Dual Polarisation VV-VH)" + ], + "reference": "https://doi.org/10.1109/IGARSS47720.2021.9554099", + "short_name": "VHVVR" + }, + "VI6T": { + "application_domain": "burn", + "bands": [ + "N", + "T" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-19", + "formula": "(N - T/10000.0)/(N + T/10000.0)", + "long_name": "VI6T Index", + "platforms": [ + "Landsat-TM", + "Landsat-ETM+" + ], + "reference": "https://doi.org/10.1080/01431160500239008", + "short_name": "VI6T" + }, + "VI700": { + "application_domain": "vegetation", + "bands": [ + "RE1", + "R" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-09-20", + "formula": "(RE1 - R) / (RE1 + R)", + "long_name": "Vegetation Index (700 nm)", + "platforms": [ + "Sentinel-2" + ], + "reference": "https://doi.org/10.1016/S0034-4257(01)00289-9", + "short_name": "VI700" + }, + "VIBI": { + "application_domain": "urban", + "bands": [ + "N", + "R", + "S1" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-09-22", + "formula": "((N-R)/(N+R))/(((N-R)/(N+R)) + ((S1-N)/(S1+N)))", + "long_name": "Vegetation Index Built-up Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "http://dx.doi.org/10.1080/01431161.2012.687842", + "short_name": "VIBI" + }, + "VIG": { + "application_domain": "vegetation", + "bands": [ + "G", + "R" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-09-20", + "formula": "(G - R) / (G + R)", + "long_name": "Vegetation Index Green", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1016/S0034-4257(01)00289-9", + "short_name": "VIG" + }, + "VVVHD": { + "application_domain": "radar", + "bands": [ + "VV", + "VH" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-19", + "formula": "VV - VH", + "long_name": "VV-VH Difference", + "platforms": [ + "Sentinel-1 (Dual Polarisation VV-VH)" + ], + "reference": "https://doi.org/10.1109/IGARSS47720.2021.9554099", + "short_name": "VVVHD" + }, + "VVVHR": { + "application_domain": "radar", + "bands": [ + "VV", + "VH" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-19", + "formula": "VV/VH", + "long_name": "VV-VH Ratio", + "platforms": [ + "Sentinel-1 (Dual Polarisation VV-VH)" + ], + "reference": "https://doi.org/10.3390/app9040655", + "short_name": "VVVHR" + }, + "VVVHS": { + "application_domain": "radar", + "bands": [ + "VV", + "VH" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-19", + "formula": "VV + VH", + "long_name": "VV-VH Sum", + "platforms": [ + "Sentinel-1 (Dual Polarisation VV-VH)" + ], + "reference": "https://doi.org/10.1109/IGARSS47720.2021.9554099", + "short_name": "VVVHS" + }, + "VgNIRBI": { + "application_domain": "urban", + "bands": [ + "G", + "N" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-02-09", + "formula": "(G - N)/(G + N)", + "long_name": "Visible Green-Based Built-Up Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1016/j.ecolind.2015.03.037", + "short_name": "VgNIRBI" + }, + "VrNIRBI": { + "application_domain": "urban", + "bands": [ + "R", + "N" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-02-09", + "formula": "(R - N)/(R + N)", + "long_name": "Visible Red-Based Built-Up Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1016/j.ecolind.2015.03.037", + "short_name": "VrNIRBI" + }, + "WDRVI": { + "application_domain": "vegetation", + "bands": [ + "alpha", + "N", + "R" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-05-14", + "formula": "(alpha * N - R) / (alpha * N + R)", + "long_name": "Wide Dynamic Range Vegetation Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1078/0176-1617-01176", + "short_name": "WDRVI" + }, + "WDVI": { + "application_domain": "vegetation", + "bands": [ + "N", + "sla", + "R" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-05-14", + "formula": "N - sla * R", + "long_name": "Weighted Difference Vegetation Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1016/0034-4257(89)90076-X", + "short_name": "WDVI" + }, + "WI1": { + "application_domain": "water", + "bands": [ + "G", + "S2" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-09-18", + "formula": "(G - S2) / (G + S2)", + "long_name": "Water Index 1", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.3390/rs11182186", + "short_name": "WI1" + }, + "WI2": { + "application_domain": "water", + "bands": [ + "B", + "S2" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-09-18", + "formula": "(B - S2) / (B + S2)", + "long_name": "Water Index 2", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.3390/rs11182186", + "short_name": "WI2" + }, + "WI2015": { + "application_domain": "water", + "bands": [ + "G", + "R", + "N", + "S1", + "S2" + ], + "contributor": "https://github.com/remi-braun", + "date_of_addition": "2022-10-26", + "formula": "1.7204 + 171 * G + 3 * R - 70 * N - 45 * S1 - 71 * S2", + "long_name": "Water Index 2015", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.1016/j.rse.2015.12.055", + "short_name": "WI2015" + }, + "WRI": { + "application_domain": "water", + "bands": [ + "G", + "R", + "N", + "S1" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-01-17", + "formula": "(G + R)/(N + S1)", + "long_name": "Water Ratio Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS" + ], + "reference": "https://doi.org/10.1109/GEOINFORMATICS.2010.5567762", + "short_name": "WRI" + }, + "kEVI": { + "application_domain": "kernel", + "bands": [ + "g", + "kNN", + "kNR", + "C1", + "C2", + "kNB", + "kNL" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-05-10", + "formula": "g * (kNN - kNR) / (kNN + C1 * kNR - C2 * kNB + kNL)", + "long_name": "Kernel Enhanced Vegetation Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1126/sciadv.abc7447", + "short_name": "kEVI" + }, + "kIPVI": { + "application_domain": "kernel", + "bands": [ + "kNN", + "kNR" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-08", + "formula": "kNN/(kNN + kNR)", + "long_name": "Kernel Infrared Percentage Vegetation Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1126/sciadv.abc7447", + "short_name": "kIPVI" + }, + "kNDVI": { + "application_domain": "kernel", + "bands": [ + "kNN", + "kNR" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-04-07", + "formula": "(kNN - kNR)/(kNN + kNR)", + "long_name": "Kernel Normalized Difference Vegetation Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1126/sciadv.abc7447", + "short_name": "kNDVI" + }, + "kRVI": { + "application_domain": "kernel", + "bands": [ + "kNN", + "kNR" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-04-07", + "formula": "kNN / kNR", + "long_name": "Kernel Ratio Vegetation Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1126/sciadv.abc7447", + "short_name": "kRVI" + }, + "kVARI": { + "application_domain": "kernel", + "bands": [ + "kGG", + "kGR", + "kGB" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2021-05-10", + "formula": "(kGG - kGR) / (kGG + kGR - kGB)", + "long_name": "Kernel Visible Atmospherically Resistant Index", + "platforms": [ + "Sentinel-2", + "Landsat-OLI", + "Landsat-TM", + "Landsat-ETM+", + "MODIS", + "Planet-Fusion" + ], + "reference": "https://doi.org/10.1126/sciadv.abc7447", + "short_name": "kVARI" + }, + "mND705": { + "application_domain": "vegetation", + "bands": [ + "RE2", + "RE1", + "A" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-08", + "formula": "(RE2 - RE1)/(RE2 + RE1 - A)", + "long_name": "Modified Normalized Difference (705, 750 and 445 nm)", + "platforms": [ + "Sentinel-2" + ], + "reference": "https://doi.org/10.1016/S0034-4257(02)00010-X", + "short_name": "mND705" + }, + "mSR705": { + "application_domain": "vegetation", + "bands": [ + "RE2", + "A" + ], + "contributor": "https://github.com/davemlz", + "date_of_addition": "2022-04-08", + "formula": "(RE2 - A)/(RE2 + A)", + "long_name": "Modified Simple Ratio (705 and 445 nm)", + "platforms": [ + "Sentinel-2" + ], + "reference": "https://doi.org/10.1016/S0034-4257(02)00010-X", + "short_name": "mSR705" + } + } +} diff --git a/lib/openeo/extra/spectral_indices/resources/extra-indices-dict.json b/lib/openeo/extra/spectral_indices/resources/extra-indices-dict.json new file mode 100644 index 000000000..f8b0e55f7 --- /dev/null +++ b/lib/openeo/extra/spectral_indices/resources/extra-indices-dict.json @@ -0,0 +1,98 @@ +{ + "SpectralIndices": { + "ANIR": { + "bands": + [ + "R", + "N", + "S1" + ], + "contributor": "vito", + "date_of_addition": "2021-10-27", + "formula": "exec('import numpy as np') or exec('from openeo.processes import clip') or np.arccos(clip((( np.sqrt( (0.8328 - 0.6646)**2 + (N - R)**2 )**2 + np.sqrt( (1.610 - 0.8328)**2 + (S1 - N)**2 )**2 - np.sqrt( (1.610 - 0.6646)**2 + (S1 - R)**2 )**2 ) / (2 * np.sqrt( (0.8328 - 0.6646)**2 + (N - R)**2 ) * np.sqrt( (1.610 - 0.8328)**2 + (S1 - N)**2 ))), -1,1)) * (1. / np.pi)", + "long_name": "Angle at Near InfraRed", + "reference": "", + "short_name": "ANIR", + "type": "vegetation" + }, + "NDRE1": { + "bands": [ + "N", + "RE1" + ], + "contributor": "vito", + "date_of_addition": "2021-10-27", + "formula": "(N - RE1) / (N + RE1)", + "long_name": "Normalized Difference Red Edge 1", + "reference": "", + "short_name": "NDRE1", + "type": "vegetation" + }, + "NDRE2": { + "bands": [ + "N", + "RE2" + ], + "contributor": "vito", + "date_of_addition": "2021-10-27", + "formula": "(N - RE2) / (N + RE2)", + "long_name": "Normalized Difference Red Edge 2", + "reference": "", + "short_name": "NDRE2", + "type": "vegetation" + }, + "NDRE5": { + "bands": [ + "RE1", + "RE3" + ], + "contributor": "vito", + "date_of_addition": "2021-10-27", + "formula": "(RE3 - RE1) / (RE3 + RE1)", + "long_name": "Normalized Difference Red Edge 5", + "reference": "", + "short_name": "NDRE5", + "type": "vegetation" + }, + "BI2": { + "bands": [ + "G", + "R", + "N" + ], + "contributor": "vito", + "date_of_addition": "2022-01-27", + "formula": "((R**2+N**2+G**2)**0.5)/3", + "long_name": "Brightness index 2", + "reference": "https://digifed.org/", + "short_name": "BI2", + "type": "soil" + }, + "BI_B08": { + "bands": [ + "R", + "N" + ], + "contributor": "vito", + "date_of_addition": "2022-01-27", + "formula": "(R**2+N**2)**0.5", + "long_name": "Brightness index B08", + "reference": "https://digifed.org/", + "short_name": "BI_B08", + "type": "soil" + }, + "LSWI_B12": { + "bands": [ + "N", + "S2" + ], + "contributor": "vito", + "date_of_addition": "2022-01-27", + "formula": "(N-S2)/(N+S2)", + "long_name": "Sentinel-2 land surface water index", + "reference": "https://digifed.org/", + "short_name": "LSWI_B12", + "type": "water" + } + } +} diff --git a/lib/openeo/extra/spectral_indices/spectral_indices.py b/lib/openeo/extra/spectral_indices/spectral_indices.py new file mode 100644 index 000000000..8ac3c0b93 --- /dev/null +++ b/lib/openeo/extra/spectral_indices/spectral_indices.py @@ -0,0 +1,475 @@ +import functools +import json +import re +from typing import Dict, List, Optional, Set + +from openeo import BaseOpenEoException +from openeo.processes import ProcessBuilder, array_create, array_modify +from openeo.rest.datacube import DataCube + +try: + import importlib_resources +except ImportError: + import importlib.resources as importlib_resources + + +@functools.lru_cache(maxsize=1) +def load_indices() -> Dict[str, dict]: + """Load set of supported spectral indices.""" + # TODO: encapsulate all this json loading in a single Awesome Spectral Indices registry class? + specs = {} + + for path in [ + "resources/awesome-spectral-indices/spectral-indices-dict.json", + # TODO #506 Deprecate extra-indices-dict.json as a whole + # and provide an alternative mechanism to work with custom indices + "resources/extra-indices-dict.json", + ]: + with importlib_resources.files("openeo.extra.spectral_indices") / path as resource_path: + data = json.loads(resource_path.read_text(encoding="utf8")) + overwrites = set(specs.keys()).intersection(data["SpectralIndices"].keys()) + if overwrites: + raise RuntimeError(f"Duplicate spectral indices: {overwrites} from {path}") + specs.update(data["SpectralIndices"]) + + return specs + + +@functools.lru_cache(maxsize=1) +def load_constants() -> Dict[str, float]: + """Load constants defined by Awesome Spectral Indices.""" + # TODO: encapsulate all this json loading in a single Awesome Spectral Indices registry class? + with importlib_resources.files( + "openeo.extra.spectral_indices" + ) / "resources/awesome-spectral-indices/constants.json" as resource_path: + data = json.loads(resource_path.read_text(encoding="utf8")) + + return {k: v["default"] for k, v in data.items() if isinstance(v["default"], (int, float))} + + +@functools.lru_cache(maxsize=1) +def _load_bands() -> Dict[str, dict]: + """Load band name mapping defined by Awesome Spectral Indices.""" + # TODO: encapsulate all this json loading in a single Awesome Spectral Indices registry class? + with importlib_resources.files( + "openeo.extra.spectral_indices" + ) / "resources/awesome-spectral-indices/bands.json" as resource_path: + data = json.loads(resource_path.read_text(encoding="utf8")) + return data + + +class BandMappingException(BaseOpenEoException): + """Failure to determine band-variable mapping.""" + + +class _BandMapping: + """ + Helper class to extract mappings between band names and variable names used in Awesome Spectral Indices formulas. + """ + + _EXTRA = { + "sentinel1": {"HH": "HH", "HV": "HV", "VH": "VH", "VV": "VV"}, + } + + def __init__(self): + # Load bands.json from Awesome Spectral Indices + self._band_data = _load_bands() + + @staticmethod + def _normalize_platform(platform: str) -> str: + platform = platform.lower().replace("-", "").replace(" ", "") + if platform in {"sentinel2a", "sentinel2b"}: + platform = "sentinel2" + return platform + + @staticmethod + def _normalize_band_name(band_name: str) -> str: + band_name = band_name.upper() + # Normalize band names like "B01" to "B1" + band_name = re.sub(r"^B0+(\d+)$", r"B\1", band_name) + return band_name + + @functools.lru_cache(maxsize=1) + def get_platforms(self) -> Set[str]: + """Get list of supported (normalized) satellite platforms.""" + platforms = {p for var_data in self._band_data.values() for p in var_data.get("platforms", {}).keys()} + platforms.update(self._EXTRA.keys()) + platforms.update({self._normalize_platform(p) for p in platforms}) + return platforms + + def guess_platform(self, name: str) -> str: + """Guess platform from given collection id or name.""" + # First check original id, then retry with removed separators as last resort. + for haystack in [name.lower(), re.sub("[_ -]", "", name.lower())]: + for platform in sorted(self.get_platforms(), key=len, reverse=True): + if platform in haystack: + return platform + raise BandMappingException(f"Unable to guess satellite platform from id {name!r}.") + + def variable_to_band_name_map(self, platform: str) -> Dict[str, str]: + """ + Build mapping from Awesome Spectral Indices variable names to (normalized) band names for given satellite platform. + """ + platform_normalized = self._normalize_platform(platform) + if platform_normalized in self._EXTRA: + return self._EXTRA[platform_normalized] + + var_to_band = { + var: pf_data["band"] + for var, var_data in self._band_data.items() + for pf, pf_data in var_data.get("platforms", {}).items() + if self._normalize_platform(pf) == platform_normalized + } + if not var_to_band: + raise BandMappingException(f"Empty band mapping derived for satellite platform {platform!r}") + return var_to_band + + def actual_band_name_to_variable_map(self, platform: str, band_names: List[str]) -> Dict[str, str]: + """Build mapping from actual band names (as given) to Awesome Spectral Indices variable names.""" + var_to_band = self.variable_to_band_name_map(platform=platform) + band_to_var = { + band_name: var + for var, normalized_band_name in var_to_band.items() + for band_name in band_names + if self._normalize_band_name(band_name) == normalized_band_name + } + return band_to_var + + +def list_indices() -> List[str]: + """List names of supported spectral indices""" + specs = load_indices() + return list(specs.keys()) + + +def _check_params(item, params): + range_vals = ["input_range", "output_range"] + if set(params) != set(range_vals): + raise ValueError( + f"You have set the parameters {params} on {item}, while the following are required {range_vals}" + ) + for rng in range_vals: + if params[rng] is None: + continue + if len(params[rng]) != 2: + raise ValueError( + f"The list of provided values {params[rng]} for parameter {rng} for {item} is not of length 2" + ) + # TODO: allow float too? + if not all(isinstance(val, int) for val in params[rng]): + raise ValueError("The ranges you supplied are not all of type int") + if (params["input_range"] is None) != (params["output_range"] is None): + raise ValueError(f"The index_range and output_range of {item} should either be both supplied, or both None") + + +def _check_validity_index_dict(index_dict: dict, index_specs: dict): + # TODO: this `index_dict` API needs some more rethinking: + # - the dictionary has no explicit order of indices, which can be important for end user + # - allow "collection" to be missing (e.g. if no rescaling is desired, or input data is not kept)? + # - option to define default output range, instead of having it to specify it for each index? + # - keep "rescaling" feature separate/orthogonal from "spectral indices" feature. It could be useful as + # a more generic machine learning data preparation feature + input_vals = ["collection", "indices"] + if set(index_dict.keys()) != set(input_vals): + raise ValueError( + f"The first level of the dictionary should contain the keys 'collection' and 'indices', but they contain {index_dict.keys()}" + ) + _check_params("collection", index_dict["collection"]) + for index, params in index_dict["indices"].items(): + if index not in index_specs.keys(): + raise NotImplementedError("Index " + index + " is not supported.") + _check_params(index, params) + + +def _callback( + x: ProcessBuilder, + index_dict: dict, + index_specs: dict, + append: bool, + band_names: List[str], + band_to_var: Dict[str, str], +) -> ProcessBuilder: + index_values = [] + x_res = x + + # TODO: use `label` parameter of `array_element` to avoid index based band references + variables = {band_to_var[bn]: x.array_element(i) for i, bn in enumerate(band_names) if bn in band_to_var} + eval_globals = { + **load_constants(), + **variables, + } + # TODO: user might want to control order of indices, which is tricky through a dictionary. + for index, params in index_dict["indices"].items(): + index_result = eval(index_specs[index]["formula"], eval_globals) + if params["input_range"] is not None: + index_result = index_result.linear_scale_range(*params["input_range"], *params["output_range"]) + index_values.append(index_result) + if index_dict["collection"]["input_range"] is not None: + x_res = x_res.linear_scale_range( + *index_dict["collection"]["input_range"], *index_dict["collection"]["output_range"] + ) + if append: + return array_modify(data=x_res, values=index_values, index=len(band_names)) + else: + return array_create(data=index_values) + + +def compute_and_rescale_indices( + datacube: DataCube, + index_dict: dict, + *, + append: bool = False, + variable_map: Optional[Dict[str, str]] = None, + platform: Optional[str] = None, +) -> DataCube: + """ + Computes a list of indices from a data cube + + :param datacube: input data cube + :param index_dict: a dictionary that contains the input- and output range of the collection on which you calculate the indices + as well as the indices that you want to calculate with their responding input- and output ranges + It follows the following format:: + + { + "collection": { + "input_range": [0,8000], + "output_range": [0,250] + }, + "indices": { + "NDVI": { + "input_range": [-1,1], + "output_range": [0,250] + }, + } + } + + If you don't want to rescale your data, you can fill the input-, index- and output-range with ``None``. + + See `list_indices()` for supported indices. + + :param append: append the indices as bands to the given data cube + instead of creating a new cube with only the calculated indices + :param variable_map: (optional) mapping from Awesome Spectral Indices formula variable to actual cube band names. + To be specified if the given data cube has non-standard band names, + or the satellite platform can not be recognized from the data cube metadata. + See :ref:`spectral_indices_manual_band_mapping` for more information. + :param platform: optionally specify the satellite platform (to determine band name mapping) + if the given data cube has no or an unhandled collection id in its metadata. + See :ref:`spectral_indices_manual_band_mapping` for more information. + + :return: the datacube with the indices attached as bands + + .. warning:: this "rescaled" index helper uses an experimental API (e.g. `index_dict` argument) that is subject to change. + + .. versionadded:: 0.26.0 + Added `variable_map` and `platform` arguments. + + """ + index_specs = load_indices() + + _check_validity_index_dict(index_dict, index_specs) + + if variable_map is None: + # Automatic band mapping + band_mapping = _BandMapping() + if platform is None: + if datacube.metadata and datacube.metadata.get("id"): + platform = band_mapping.guess_platform(name=datacube.metadata.get("id")) + else: + raise BandMappingException("Unable to determine satellite platform from data cube metadata") + band_to_var = band_mapping.actual_band_name_to_variable_map( + platform=platform, band_names=datacube.metadata.band_names + ) + else: + band_to_var = {b: v for v, b in variable_map.items()} + + res = datacube.apply_dimension( + dimension="bands", + process=lambda x: _callback( + x, + index_dict=index_dict, + index_specs=index_specs, + append=append, + band_names=datacube.metadata.band_names, + band_to_var=band_to_var, + ), + ) + if append: + return res.rename_labels("bands", target=datacube.metadata.band_names + list(index_dict["indices"].keys())) + else: + return res.rename_labels("bands", target=list(index_dict["indices"].keys())) + + +def append_and_rescale_indices( + datacube: DataCube, + index_dict: dict, + *, + variable_map: Optional[Dict[str, str]] = None, + platform: Optional[str] = None, +) -> DataCube: + """ + Computes a list of indices from a datacube and appends them to the existing datacube + + :param datacube: input data cube + :param index_dict: a dictionary that contains the input- and output range of the collection on which you calculate the indices + as well as the indices that you want to calculate with their responding input- and output ranges + It follows the following format:: + + { + "collection": { + "input_range": [0,8000], + "output_range": [0,250] + }, + "indices": { + "NDVI": { + "input_range": [-1,1], + "output_range": [0,250] + }, + } + } + + See `list_indices()` for supported indices. + + :param variable_map: (optional) mapping from Awesome Spectral Indices formula variable to actual cube band names. + To be specified if the given data cube has non-standard band names, + or the satellite platform can not be recognized from the data cube metadata. + See :ref:`spectral_indices_manual_band_mapping` for more information. + :param platform: optionally specify the satellite platform (to determine band name mapping) + if the given data cube has no or an unhandled collection id in its metadata. + See :ref:`spectral_indices_manual_band_mapping` for more information. + + :return: data cube with appended indices + + .. warning:: this "rescaled" index helper uses an experimental API (e.g. `index_dict` argument) that is subject to change. + + .. versionadded:: 0.26.0 + Added `variable_map` and `platform` arguments. + """ + return compute_and_rescale_indices( + datacube=datacube, index_dict=index_dict, append=True, variable_map=variable_map, platform=platform + ) + + +def compute_indices( + datacube: DataCube, + indices: List[str], + *, + append: bool = False, + variable_map: Optional[Dict[str, str]] = None, + platform: Optional[str] = None, +) -> DataCube: + """ + Compute multiple spectral indices from the given data cube. + + :param datacube: input data cube + :param indices: list of names of the indices to compute and append. See `list_indices()` for supported indices. + :param append: append the indices as bands to the given data cube + instead of creating a new cube with only the calculated indices + :param variable_map: (optional) mapping from Awesome Spectral Indices formula variable to actual cube band names. + To be specified if the given data cube has non-standard band names, + or the satellite platform can not be recognized from the data cube metadata. + See :ref:`spectral_indices_manual_band_mapping` for more information. + :param platform: optionally specify the satellite platform (to determine band name mapping) + if the given data cube has no or an unhandled collection id in its metadata. + See :ref:`spectral_indices_manual_band_mapping` for more information. + + :return: data cube containing the indices as bands + + .. versionadded:: 0.26.0 + Added `variable_map` and `platform` arguments. + """ + # TODO: it's bit weird to have to specify all these None's in this structure + index_dict = { + "collection": { + "input_range": None, + "output_range": None, + }, + "indices": {index: {"input_range": None, "output_range": None} for index in indices}, + } + return compute_and_rescale_indices( + datacube=datacube, index_dict=index_dict, append=append, variable_map=variable_map, platform=platform + ) + + +def append_indices( + datacube: DataCube, + indices: List[str], + *, + variable_map: Optional[Dict[str, str]] = None, + platform: Optional[str] = None, +) -> DataCube: + """ + Compute multiple spectral indices and append them to the given data cube. + + :param datacube: input data cube + :param indices: list of names of the indices to compute and append. See `list_indices()` for supported indices. + :param variable_map: (optional) mapping from Awesome Spectral Indices formula variable to actual cube band names. + To be specified if the given data cube has non-standard band names, + or the satellite platform can not be recognized from the data cube metadata. + See :ref:`spectral_indices_manual_band_mapping` for more information. + :param platform: optionally specify the satellite platform (to determine band name mapping) + if the given data cube has no or an unhandled collection id in its metadata. + See :ref:`spectral_indices_manual_band_mapping` for more information. + + :return: data cube with appended indices + + .. versionadded:: 0.26.0 + Added `variable_map` and `platform` arguments. + """ + + return compute_indices( + datacube=datacube, indices=indices, append=True, variable_map=variable_map, platform=platform + ) + + +def compute_index( + datacube: DataCube, index: str, *, variable_map: Optional[Dict[str, str]] = None, platform: Optional[str] = None +) -> DataCube: + """ + Compute a single spectral index from a data cube. + + :param datacube: input data cube + :param index: name of the index to compute. See `list_indices()` for supported indices. + :param variable_map: (optional) mapping from Awesome Spectral Indices formula variable to actual cube band names. + To be specified if the given data cube has non-standard band names, + or the satellite platform can not be recognized from the data cube metadata. + See :ref:`spectral_indices_manual_band_mapping` for more information. + :param platform: optionally specify the satellite platform (to determine band name mapping) + if the given data cube has no or an unhandled collection id in its metadata. + See :ref:`spectral_indices_manual_band_mapping` for more information. + + :return: data cube containing the index as band + + .. versionadded:: 0.26.0 + Added `variable_map` and `platform` arguments. + """ + # TODO: option to compute the index with `reduce_dimension` instead of `apply_dimension`? + return compute_indices( + datacube=datacube, indices=[index], append=False, variable_map=variable_map, platform=platform + ) + + +def append_index( + datacube: DataCube, index: str, *, variable_map: Optional[Dict[str, str]] = None, platform: Optional[str] = None +) -> DataCube: + """ + Compute a single spectral index and append it to the given data cube. + + :param cube: input data cube + :param index: name of the index to compute and append. See `list_indices()` for supported indices. + :param variable_map: (optional) mapping from Awesome Spectral Indices formula variable to actual cube band names. + To be specified if the given data cube has non-standard band names, + or the satellite platform can not be recognized from the data cube metadata. + See :ref:`spectral_indices_manual_band_mapping` for more information. + :param platform: optionally specify the satellite platform (to determine band name mapping) + if the given data cube has no or an unhandled collection id in its metadata. + See :ref:`spectral_indices_manual_band_mapping` for more information. + + :return: data cube with appended index + + .. versionadded:: 0.26.0 + Added `variable_map` and `platform` arguments. + """ + return compute_indices( + datacube=datacube, indices=[index], append=True, variable_map=variable_map, platform=platform + ) diff --git a/lib/openeo/internal/__init__.py b/lib/openeo/internal/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/lib/openeo/internal/documentation.py b/lib/openeo/internal/documentation.py new file mode 100644 index 000000000..c7da614c4 --- /dev/null +++ b/lib/openeo/internal/documentation.py @@ -0,0 +1,60 @@ +""" +Utilities to build/automate/extend documentation +""" + +import collections +import inspect +import textwrap +from functools import partial +from typing import Callable, Optional, Tuple, TypeVar + +# TODO: give this a proper public API? +_process_registry = collections.defaultdict(list) + + +T = TypeVar("T", bound=Callable) + + +def openeo_process(f: Optional[T] = None, process_id: Optional[str] = None, mode: Optional[str] = None) -> T: + """ + Decorator for function or method to associate it with a standard openEO process + + :param f: function or method + :param process_id: openEO process_id (to be given when it can not be guessed from function name) + :return: + """ + # TODO: include openEO version? + # TODO: support non-standard/proposed/experimental? + # TODO: handling of `mode` (or something alike): apply/reduce_dimension/... callback, (band) math operator, ...? + # TODO: documentation test that "seealso" urls are valid + # TODO: inject more references/metadata in __doc__ + if f is None: + # Parameterized decorator call + return partial(openeo_process, process_id=process_id) + + process_id = process_id or f.__name__ + url = f"https://processes.openeo.org/#{process_id}" + seealso = f'.. seealso::\n openeo.org documentation on `process "{process_id}" <{url}>`_.' + f.__doc__ = textwrap.dedent(f.__doc__ or "") + "\n\n" + seealso + + _process_registry[process_id].append((f, mode)) + return f + + +def openeo_endpoint(endpoint: str) -> Callable[[Callable], Callable]: + """ + Parameterized decorator to annotate given function or method with the openEO endpoint it interacts with + + :param endpoint: REST endpoint (e.g. "GET /jobs", "POST /result", ...) + :return: + """ + # TODO: automatically parse/normalize endpoint (to method+path) + # TODO: wrap this in some markup/directive to make this more a "small print" note. + + def decorate(f: Callable) -> Callable: + is_method = list(inspect.signature(f).parameters.keys())[:1] == ["self"] + seealso = f"This {'method' if is_method else 'function'} uses openEO endpoint ``{endpoint}``" + f.__doc__ = textwrap.dedent(f.__doc__ or "") + "\n\n" + seealso + "\n" + return f + + return decorate diff --git a/lib/openeo/internal/graph_building.py b/lib/openeo/internal/graph_building.py new file mode 100644 index 000000000..6f5918ea2 --- /dev/null +++ b/lib/openeo/internal/graph_building.py @@ -0,0 +1,476 @@ +""" +Internal openEO process graph building utilities +'''''''''''''''''''''''''''''''''''''''''''''''''' + +Internal functionality for abstracting, building, manipulating and processing openEO process graphs. + +""" + +from __future__ import annotations + +import abc +import collections +import copy +import json +import sys +from contextlib import nullcontext +from pathlib import Path +from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union + +from openeo.api.process import Parameter +from openeo.internal.process_graph_visitor import ( + ProcessGraphUnflattener, + ProcessGraphVisitException, + ProcessGraphVisitor, +) +from openeo.util import dict_no_none, load_json_resource + + +class FlatGraphableMixin(metaclass=abc.ABCMeta): + """ + Mixin for classes that can be exported/converted to + a "flat graph" representation of an openEO process graph. + """ + + @abc.abstractmethod + def flat_graph(self) -> Dict[str, dict]: + ... + + def to_json(self, *, indent: Union[int, None] = 2, separators: Optional[Tuple[str, str]] = None) -> str: + """ + Get interoperable JSON representation of the process graph. + + See :py:meth:`DataCube.print_json` to directly print the JSON representation + and :ref:`process_graph_export` for more usage information. + + Also see ``json.dumps`` docs for more information on the JSON formatting options. + + :param indent: JSON indentation level. + :param separators: (optional) tuple of item/key separators. + :return: JSON string + """ + pg = {"process_graph": self.flat_graph()} + return json.dumps(pg, indent=indent, separators=separators) + + def print_json( + self, + *, + file=None, + indent: Union[int, None] = 2, + separators: Optional[Tuple[str, str]] = None, + end: str = "\n", + ): + """ + Print interoperable JSON representation of the process graph. + + See :py:meth:`DataCube.to_json` to get the JSON representation as a string + and :ref:`process_graph_export` for more usage information. + + Also see ``json.dumps`` docs for more information on the JSON formatting options. + + :param file: file-like object (stream) to print to (current ``sys.stdout`` by default). + Or a path (string or pathlib.Path) to a file to write to. + :param indent: JSON indentation level. + :param separators: (optional) tuple of item/key separators. + :param end: additional string to be printed at the end (newline by default). + + .. versionadded:: 0.12.0 + + .. versionadded:: 0.23.0 + added the ``end`` argument. + """ + pg = {"process_graph": self.flat_graph()} + if isinstance(file, (str, Path)): + # Create (new) file and automatically close it + file_ctx = Path(file).open("w", encoding="utf8") + else: + # Just use file as-is, but don't close it automatically. + file_ctx = nullcontext(enter_result=file or sys.stdout) + with file_ctx as f: + json.dump(pg, f, indent=indent, separators=separators) + if end: + f.write(end) + + +class _FromNodeMixin(abc.ABC): + """Mixin for classes that want to hook into the generation of a "from_node" reference.""" + + @abc.abstractmethod + def from_node(self) -> PGNode: + # TODO: "from_node" is a bit a confusing name: + # it refers to the "from_node" node reference in openEO process graphs, + # but as a method name here it reads like "construct from PGNode", + # while it is actually meant as "export as PGNode" (that can be used in a "from_node" reference). + pass + + +class PGNode(_FromNodeMixin, FlatGraphableMixin): + """ + A process node in a process graph: has at least a process_id and arguments. + + Note that a full openEO "process graph" is essentially a directed acyclic graph of nodes + pointing to each other. A full process graph is practically equivalent with its "result" node, + as it points (directly or indirectly) to all the other nodes it depends on. + + .. warning:: + This class is an implementation detail meant for internal use. + It is not recommended for general use in normal user code. + Instead, use process graph abstraction builders like + :py:meth:`Connection.load_collection() `, + :py:meth:`Connection.datacube_from_process() `, + :py:meth:`Connection.datacube_from_flat_graph() `, + :py:meth:`Connection.datacube_from_json() `, + :py:meth:`Connection.load_ml_model() `, + :py:func:`openeo.processes.process()`, + + """ + + __slots__ = ["_process_id", "_arguments", "_namespace"] + + def __init__(self, process_id: str, arguments: dict = None, namespace: Union[str, None] = None, **kwargs): + self._process_id = process_id + # Merge arguments dict and kwargs + arguments = dict(**(arguments or {}), **kwargs) + # Make sure direct PGNode arguments are properly wrapped in a "from_node" dict + for arg, value in arguments.items(): + if isinstance(value, _FromNodeMixin): + arguments[arg] = {"from_node": value.from_node()} + elif isinstance(value, list): + for index, arrayelement in enumerate(value): + if isinstance(arrayelement, _FromNodeMixin): + value[index] = {"from_node": arrayelement.from_node()} + # TODO: use a frozendict of some sort to ensure immutability? + self._arguments = arguments + self._namespace = namespace + + def from_node(self): + return self + + def __repr__(self): + return "<{c} {p!r} at 0x{m:x}>".format(c=self.__class__.__name__, p=self.process_id, m=id(self)) + + @property + def process_id(self) -> str: + return self._process_id + + @property + def arguments(self) -> dict: + return self._arguments + + @property + def namespace(self) -> Union[str, None]: + return self._namespace + + def update_arguments(self, **kwargs): + """ + Add/Update arguments of the process node. + + .. versionadded:: 0.10.1 + """ + self._arguments = {**self._arguments, **kwargs} + + def _as_tuple(self): + return (self._process_id, self._arguments, self._namespace) + + def __eq__(self, other): + return isinstance(other, type(self)) and self._as_tuple() == other._as_tuple() + + def to_dict(self) -> dict: + """ + Convert process graph to a nested dictionary structure. + Uses deep copy style: nodes that are reused in graph will be deduplicated + """ + + def _deep_copy(x): + """PGNode aware deep copy helper""" + if isinstance(x, PGNode): + return dict_no_none(process_id=x.process_id, arguments=_deep_copy(x.arguments), namespace=x.namespace) + if isinstance(x, Parameter): + return {"from_parameter": x.name} + elif isinstance(x, dict): + return {str(k): _deep_copy(v) for k, v in x.items()} + elif isinstance(x, (list, tuple)): + return type(x)(_deep_copy(v) for v in x) + elif isinstance(x, (str, int, float)) or x is None: + return x + else: + raise ValueError(repr(x)) + + return _deep_copy(self) + + def flat_graph(self) -> Dict[str, dict]: + """Get the process graph in internal flat dict representation.""" + return GraphFlattener().flatten(node=self) + + @staticmethod + def to_process_graph_argument(value: Union["PGNode", str, dict]) -> dict: + """ + Normalize given argument properly to a "process_graph" argument + to be used as reducer/subprocess for processes like + ``reduce_dimension``, ``aggregate_spatial``, ``apply``, ``merge_cubes``, ``resample_cube_temporal`` + """ + if isinstance(value, str): + # assume string with predefined reduce/apply process ("mean", "sum", ...) + # TODO: is this case still used? It's invalid anyway for 1.0 openEO spec I think? + return value + elif isinstance(value, PGNode): + return {"process_graph": value} + elif isinstance(value, dict) and isinstance(value.get("process_graph"), PGNode): + return value + else: + raise ValueError(value) + + @staticmethod + def from_flat_graph(flat_graph: dict, parameters: Optional[dict] = None) -> PGNode: + """Unflatten a given flat dict representation of a process graph and return result node.""" + return PGNodeGraphUnflattener.unflatten(flat_graph=flat_graph, parameters=parameters) + + + def walk_nodes(self) -> Iterator[PGNode]: + """Walk this node and all it's parents""" + # TODO: option to do deep walk (walk through child graphs too)? + yield self + + def walk(x) -> Iterator[PGNode]: + if isinstance(x, PGNode): + yield from x.walk_nodes() + elif isinstance(x, dict): + for v in x.values(): + yield from walk(v) + elif isinstance(x, (list, tuple)): + for v in x: + yield from walk(v) + + yield from walk(self.arguments) + + +def as_flat_graph(x: Union[dict, FlatGraphableMixin, Path, List[FlatGraphableMixin], Any]) -> Dict[str, dict]: + """ + Convert given object to a internal flat dict graph representation. + """ + # TODO: document or verify which process graph flavor this is: + # including `{"process": {"process_graph": {nodes}}` ("process graph with metadata") + # including `{"process_graph": {nodes}}` ("process graph") + # or just the raw process graph nodes? + if isinstance(x, dict): + # Assume given dict is already a flat graph representation + return x + elif isinstance(x, FlatGraphableMixin): + return x.flat_graph() + elif isinstance(x, (str, Path)): + # Assume a JSON resource (raw JSON, path to local file, JSON url, ...) + return load_json_resource(x) + elif isinstance(x, (list, tuple)) and all(isinstance(i, FlatGraphableMixin) for i in x): + return MultiLeafGraph(x).flat_graph() + raise ValueError(x) + + +class ReduceNode(PGNode): + """ + A process graph node for "reduce" processes (has a reducer sub-process-graph) + """ + + def __init__( + self, + data: _FromNodeMixin, + reducer: Union[PGNode, str, dict], + dimension: str, + context=None, + process_id="reduce_dimension", + band_math_mode: bool = False, + ): + assert process_id in ("reduce_dimension", "reduce_dimension_binary") + arguments = { + "data": data, + "reducer": self.to_process_graph_argument(reducer), + "dimension": dimension, + } + if context is not None: + arguments["context"] = context + super().__init__(process_id=process_id, arguments=arguments) + # TODO #123 is it (still) necessary to make "band" math a special case? + self.band_math_mode = band_math_mode + + @property + def dimension(self): + return self.arguments["dimension"] + + def reducer_process_graph(self) -> PGNode: + return self.arguments["reducer"]["process_graph"] + + def clone_with_new_reducer(self, reducer: PGNode) -> ReduceNode: + """Copy/clone this reduce node: keep input reference, but use new reducer""" + return ReduceNode( + data=self.arguments["data"]["from_node"], + reducer=reducer, + dimension=self.arguments["dimension"], + band_math_mode=self.band_math_mode, + context=self.arguments.get("context"), + ) + + +class FlatGraphNodeIdGenerator: + """ + Helper class to generate unique node ids (e.g. autoincrement style) + for processes in a flat process graph. + """ + + def __init__(self): + self._counters = collections.defaultdict(int) + + def generate(self, process_id: str): + """Generate new key for given process id.""" + self._counters[process_id] += 1 + return "{p}{c}".format(p=process_id.replace("_", ""), c=self._counters[process_id]) + + +class GraphFlattener(ProcessGraphVisitor): + + def __init__(self, node_id_generator: FlatGraphNodeIdGenerator = None, multi_input_mode: bool = False): + super().__init__() + self._node_id_generator = node_id_generator or FlatGraphNodeIdGenerator() + self._last_node_id = None + self._flattened: Dict[str, dict] = {} + self._argument_stack = [] + self._node_cache = {} + self._multi_input_mode = multi_input_mode + + def flatten(self, node: PGNode) -> Dict[str, dict]: + """Consume given nested process graph and return flat dict representation""" + if self._flattened and not self._multi_input_mode: + raise RuntimeError("Flattening multiple graphs, but not in multi-input mode") + self.accept_node(node) + assert len(self._argument_stack) == 0 + return self.flattened(set_result_flag=not self._multi_input_mode) + + def flattened(self, set_result_flag: bool = True) -> Dict[str, dict]: + flat_graph = copy.deepcopy(self._flattened) + if set_result_flag: + # TODO #583 an "end" node is not necessarily a "result" node + flat_graph[self._last_node_id]["result"] = True + return flat_graph + + def accept_node(self, node: PGNode): + # Process reused nodes only first time and remember node id. + node_id = id(node) + if node_id not in self._node_cache: + super()._accept_process(process_id=node.process_id, arguments=node.arguments, namespace=node.namespace) + self._node_cache[node_id] = self._last_node_id + else: + self._last_node_id = self._node_cache[node_id] + + def enterProcess(self, process_id: str, arguments: dict, namespace: Union[str, None]): + self._argument_stack.append({}) + + def leaveProcess(self, process_id: str, arguments: dict, namespace: Union[str, None]): + node_id = self._node_id_generator.generate(process_id) + self._flattened[node_id] = dict_no_none( + process_id=process_id, + arguments=self._argument_stack.pop(), + namespace=namespace, + ) + self._last_node_id = node_id + + def _store_argument(self, argument_id: str, value): + if isinstance(value, Parameter): + value = {"from_parameter": value.name} + self._argument_stack[-1][argument_id] = value + + def _store_array_element(self, value): + if isinstance(value, Parameter): + value = {"from_parameter": value.name} + self._argument_stack[-1].append(value) + + def enterArray(self, argument_id: str): + array = [] + self._store_argument(argument_id, array) + self._argument_stack.append(array) + + def leaveArray(self, argument_id: str): + self._argument_stack.pop() + + def arrayElementDone(self, value): + self._store_array_element(self._flatten_argument(value)) + + def constantArrayElement(self, value): + self._store_array_element(self._flatten_argument(value)) + + def _flatten_argument(self, value): + if isinstance(value, dict): + if "from_node" in value: + value = {"from_node": self._last_node_id} + elif "process_graph" in value: + pg = value["process_graph"] + if isinstance(pg, PGNode): + value = {"process_graph": GraphFlattener(node_id_generator=self._node_id_generator).flatten(pg)} + elif isinstance(pg, dict): + # Assume it is already a valid flat graph representation of a subprocess + value = {"process_graph": pg} + else: + raise ValueError(pg) + else: + value = {k: self._flatten_argument(v) for k, v in value.items()} + elif isinstance(value, Parameter): + value = {"from_parameter": value.name} + return value + + def leaveArgument(self, argument_id: str, value): + self._store_argument(argument_id, self._flatten_argument(value)) + + def constantArgument(self, argument_id: str, value): + self._store_argument(argument_id, value) + + +class PGNodeGraphUnflattener(ProcessGraphUnflattener): + """ + Unflatten a flat process graph to a graph of :py:class:`PGNode` objects + + Parameter substitution can also be performed, but is optional: + if the ``parameters=None`` is given, no parameter substitution is done, + if it is a dictionary (even an empty one) is given, every parameter encountered in the process + graph must have an entry for substitution. + """ + + def __init__(self, flat_graph: dict, parameters: Optional[dict] = None): + super().__init__(flat_graph=flat_graph) + self._parameters = parameters + + def _process_node(self, node: dict) -> PGNode: + return PGNode( + process_id=node["process_id"], + arguments=self._process_value(value=node["arguments"]), + namespace=node.get("namespace"), + ) + + def _process_from_node(self, key: str, node: dict) -> PGNode: + return self.get_node(key=key) + + def _process_from_parameter(self, name: str) -> Any: + if self._parameters is None: + return super()._process_from_parameter(name=name) + if name not in self._parameters: + raise ProcessGraphVisitException("No substitution value for parameter {p!r}.".format(p=name)) + return self._parameters[name] + + +class MultiLeafGraph(FlatGraphableMixin): + """ + Container for process graphs with multiple leaf/result nodes. + """ + + __slots__ = ["_leaves"] + + def __init__(self, leaves: Iterable[FlatGraphableMixin]): + self._leaves = list(leaves) + + def flat_graph(self) -> Dict[str, dict]: + flattener = GraphFlattener(multi_input_mode=True) + for leaf in self._leaves: + if isinstance(leaf, PGNode): + flattener.flatten(leaf) + elif isinstance(leaf, _FromNodeMixin): + flattener.flatten(leaf.from_node()) + else: + raise ValueError(f"Unsupported type {type(leaf)}") + + return flattener.flattened(set_result_flag=True) diff --git a/lib/openeo/internal/jupyter.py b/lib/openeo/internal/jupyter.py new file mode 100644 index 000000000..891e7e50a --- /dev/null +++ b/lib/openeo/internal/jupyter.py @@ -0,0 +1,173 @@ +import json +import os + +from openeo.rest import OpenEoApiError + +SCRIPT_URL = "https://cdn.jsdelivr.net/npm/@openeo/vue-components@2/assets/openeo.min.js" +COMPONENT_MAP = { + "collection": "data", + "data-table": "data", + "file-format": "format", + "file-formats": "formats", + "item": "data", + "job-estimate": "estimate", + "model-builder": "value", + "service-type": "service", + "service-types": "services", + "udf-runtime": "runtime", + "udf-runtimes": "runtimes", +} + +TABLE_COLUMNS = { + "jobs": { + "id": { + "name": "ID", + "primaryKey": True, + }, + "title": { + "name": "Title", + }, + "status": { + "name": "Status", + # 'stylable': True + }, + "created": { + "name": "Submitted", + "format": "Timestamp", + "sort": "desc", + }, + "updated": { + "name": "Last update", + "format": "Timestamp", + }, + }, + "services": { + "id": { + "name": "ID", + "primaryKey": True, + }, + "title": { + "name": "Title", + }, + "type": { + "name": "Type", + # 'format': value => typeof value === 'string' ? value.toUpperCase() : value, + }, + "enabled": { + "name": "Enabled", + }, + "created": { + "name": "Submitted", + "format": "Timestamp", + "sort": "desc", + }, + }, + "files": { + "path": { + "name": "Path", + "primaryKey": True, + # 'sortFn': Utils.sortByPath, + "sort": "asc", + }, + "size": { + "name": "Size", + "format": "FileSize", + "filterable": False, + }, + "modified": { + "name": "Last modified", + "format": "Timestamp", + }, + }, +} + + +def in_jupyter_context() -> bool: + """Check if we are running in an interactive Jupyter notebook context.""" + try: + from ipykernel.zmqshell import ZMQInteractiveShell + from IPython.core.getipython import get_ipython + except ImportError: + return False + return isinstance(get_ipython(), ZMQInteractiveShell) + + +def render_component(component: str, data=None, parameters: dict = None): + parameters = parameters or {} + # Special handling for batch job results, show either item or collection depending on the data + if component == "batch-job-result": + component = "item" if data["type"] == "Feature" else "collection" + + if component == "data-table": + parameters["columns"] = TABLE_COLUMNS[parameters["columns"]] + elif component in ["collection", "collections", "item", "items"]: + url = os.environ.get("OPENEO_BASEMAP_URL") + attribution = os.environ.get("OPENEO_BASEMAP_ATTRIBUTION") + parameters["mapOptions"] = {} + if url: + parameters["mapOptions"]["basemap"] = url + if attribution: + parameters["mapOptions"]["attribution"] = attribution + + # Set the data as the corresponding parameter in the Vue components + key = COMPONENT_MAP.get(component, component) + if data is not None: + if isinstance(data, list): + # TODO: make this `to_dict` usage more explicit with an internal API? + data = [(x.to_dict() if hasattr(x, "to_dict") else x) for x in data] + parameters[key] = data + + # Construct HTML, load Vue Components source files only if the openEO HTML tag is not yet defined + return """ + + + + + """.format( + script=SCRIPT_URL, component=component, props=json.dumps(parameters) + ) + + +def render_error(error: OpenEoApiError): + # ToDo: Once we have a dedicated log/error component, use that instead of description + output = """## Error `{code}`\n\n{message}""".format(code=error.code, message=error.message) + return render_component("description", data=output) + + +# These classes are proxies to visualize openEO responses nicely in Jupyter +# To show the actual list or dict in Jupyter, use repr() or print() + + +class VisualDict(dict): + + def __init__(self, component: str, data: dict, parameters: dict = None): + dict.__init__(self, data) + self.component = component + self.parameters = parameters or {} + + def _repr_html_(self): + return render_component(self.component, self, self.parameters) + + +class VisualList(list): + + def __init__(self, component: str, data: list, parameters: dict = None): + list.__init__(self, data) + self.component = component + self.parameters = parameters or {} + + def _repr_html_(self): + return render_component(self.component, self, self.parameters) diff --git a/lib/openeo/internal/process_graph_visitor.py b/lib/openeo/internal/process_graph_visitor.py new file mode 100644 index 000000000..7c42c2202 --- /dev/null +++ b/lib/openeo/internal/process_graph_visitor.py @@ -0,0 +1,265 @@ +from __future__ import annotations + +import json +from abc import ABC +from typing import Any, Tuple, Union + +from openeo.internal.warnings import deprecated +from openeo.rest import OpenEoClientException + + +class ProcessGraphVisitException(OpenEoClientException): + pass + + +class ProcessGraphVisitor(ABC): + """ + Hierarchical Visitor for (nested) process graphs structures. + """ + + def __init__(self): + self.process_stack = [] + + @classmethod + def dereference_from_node_arguments(cls, process_graph: dict) -> str: + """ + Walk through the given (flat) process graph and replace (in-place) "from_node" references in + process arguments (dictionaries or lists) with the corresponding resolved subgraphs + + :param process_graph: process graph dictionary to be manipulated in-place + :return: name of the "result" node of the graph + """ + + # TODO avoid manipulating process graph in place? make it more explicit? work on a copy? + # TODO call it more something like "unflatten"?. Split this off of ProcessGraphVisitor? + # TODO implement this through `ProcessGraphUnflattener` ? + + def resolve_from_node(process_graph, node, from_node): + if from_node not in process_graph: + raise ProcessGraphVisitException( + "from_node {f!r} (referenced by {n!r}) not in process graph.".format(f=from_node, n=node) + ) + return process_graph[from_node] + + result_node = None + for node, node_dict in process_graph.items(): + if node_dict.get("result", False): + if result_node: + raise ProcessGraphVisitException("Multiple result nodes: {a}, {b}".format(a=result_node, b=node)) + result_node = node + arguments = node_dict.get("arguments", {}) + for arg in arguments.values(): + if isinstance(arg, dict): + if "from_node" in arg: + arg["node"] = resolve_from_node(process_graph, node, arg["from_node"]) + else: + for k, v in arg.items(): + if isinstance(v, dict) and "from_node" in v: + v["node"] = resolve_from_node(process_graph, node, v["from_node"]) + elif isinstance(arg, list): + for i, element in enumerate(arg): + if isinstance(element, dict) and "from_node" in element: + arg[i] = resolve_from_node(process_graph, node, element["from_node"]) + + if result_node is None: + dump = json.dumps(process_graph, indent=2) + raise ProcessGraphVisitException("No result node in process graph: " + dump[:1000]) + return result_node + + def accept_process_graph(self, graph: dict) -> ProcessGraphVisitor: + """ + Traverse a (flat) process graph + + :param graph: + :return: + """ + # TODO: this is driver specific functionality, working on flattened graph structures. Make this more clear? + top_level_node = self.dereference_from_node_arguments(graph) + self.accept_node(graph[top_level_node]) + return self + + @deprecated(reason="Use accept_node() instead", version="0.4.6") + def accept(self, node: dict): + self.accept_node(node) + + def accept_node(self, node: dict): + pid = node["process_id"] + arguments = node.get("arguments", {}) + namespace = node.get("namespace", None) + self._accept_process(process_id=pid, arguments=arguments, namespace=namespace) + + def _accept_process(self, process_id: str, arguments: dict, namespace: Union[str, None]): + self.process_stack.append(process_id) + self.enterProcess(process_id=process_id, arguments=arguments, namespace=namespace) + for arg_id, value in sorted(arguments.items()): + if isinstance(value, list): + self.enterArray(argument_id=arg_id) + self._accept_argument_list(value) + self.leaveArray(argument_id=arg_id) + elif isinstance(value, dict): + self.enterArgument(argument_id=arg_id, value=value) + self._accept_argument_dict(value) + self.leaveArgument(argument_id=arg_id, value=value) + else: + self.constantArgument(argument_id=arg_id, value=value) + self.leaveProcess(process_id=process_id, arguments=arguments, namespace=namespace) + assert self.process_stack.pop() == process_id + + def _accept_argument_list(self, elements: list): + for element in elements: + if isinstance(element, dict): + self._accept_argument_dict(element) + self.arrayElementDone(element) + else: + self.constantArrayElement(element) + + def _accept_argument_dict(self, value: dict): + if "node" in value and "from_node" in value: + # TODO: this looks bit weird (or at least very specific). + self.accept_node(value["node"]) + elif value.get("from_node"): + self.accept_node(value["from_node"]) + elif "process_id" in value: + self.accept_node(value) + elif "from_parameter" in value: + self.from_parameter(value["from_parameter"]) + else: + self._accept_dict(value) + + def _accept_dict(self, value: dict): + pass + + def from_parameter(self, parameter_id: str): + pass + + def enterProcess(self, process_id: str, arguments: dict, namespace: Union[str, None]): + pass + + def leaveProcess(self, process_id: str, arguments: dict, namespace: Union[str, None]): + pass + + def enterArgument(self, argument_id: str, value): + pass + + def leaveArgument(self, argument_id: str, value): + pass + + def constantArgument(self, argument_id: str, value): + pass + + def enterArray(self, argument_id: str): + pass + + def leaveArray(self, argument_id: str): + pass + + def constantArrayElement(self, value): + pass + + def arrayElementDone(self, value: dict): + pass + + +def find_result_node(flat_graph: dict) -> Tuple[str, dict]: + """ + Find result node in flat graph + + :return: tuple with node id (str) and node dictionary of the result node. + """ + result_nodes = [(key, node) for (key, node) in flat_graph.items() if node.get("result")] + + if len(result_nodes) == 1: + return result_nodes[0] + elif len(result_nodes) == 0: + raise ProcessGraphVisitException("Found no result node in flat process graph") + else: + keys = [k for (k, n) in result_nodes] + raise ProcessGraphVisitException( + "Found multiple result nodes in flat process graph: {keys!r}".format(keys=keys) + ) + + +class ProcessGraphUnflattener: + """ + Base class to process a flat graph representation of a process graph + and unflatten it by resolving the "from_node" references. + Subclassing and overriding certain methods allows to build a desired unflattened graph structure. + """ + + # Sentinel object for flagging a node "under construction" and detect graph cycles. + _UNDER_CONSTRUCTION = object() + + def __init__(self, flat_graph: dict): + self._flat_graph = flat_graph + self._nodes = {} + + @classmethod + def unflatten(cls, flat_graph: dict, **kwargs): + """Class method helper to unflatten given flat process graph""" + return cls(flat_graph=flat_graph, **kwargs).process() + + def process(self): + """Process the flat process graph: unflatten it.""" + result_key, result_node = find_result_node(flat_graph=self._flat_graph) + return self.get_node(result_key) + + def get_node(self, key: str) -> Any: + """Get processed node by node key.""" + if key not in self._nodes: + self._nodes[key] = self._UNDER_CONSTRUCTION + node = self._process_node(self._flat_graph[key]) + self._nodes[key] = node + elif self._nodes[key] is self._UNDER_CONSTRUCTION: + raise ProcessGraphVisitException("Cycle in process graph") + return self._nodes[key] + + def _process_node(self, node: dict) -> Any: + """ + Overridable: generate process graph node from flat_graph data. + """ + # Default implementation: basic validation/whitelisting, and only traverse arguments + return dict( + process_id=node["process_id"], + arguments=self._process_value(value=node["arguments"]), + **{k: node[k] for k in ["namespace", "description", "result"] if k in node}, + ) + + def _process_from_node(self, key: str, node: dict) -> Any: + """ + Overridable: generate a node from a flat_graph "from_node" reference + """ + # Default/original implementation: keep "from_node" key and add resolved node under "node" key. + # TODO: just return `self.get_node(key=key)` + return {"from_node": key, "node": self.get_node(key=key)} + + def _process_from_parameter(self, name: str) -> Any: + """ + Overridable: generate a node from a flat_graph "from_parameter" reference + """ + # Default implementation: + return {"from_parameter": name} + + def _resolve_from_node(self, key: str) -> dict: + if key not in self._flat_graph: + raise ProcessGraphVisitException("from_node reference {k!r} not found in process graph".format(k=key)) + return self._flat_graph[key] + + def _process_value(self, value) -> Any: + if isinstance(value, dict): + if "from_node" in value: + key = value["from_node"] + node = self._resolve_from_node(key=key) + return self._process_from_node(key=key, node=node) + elif "from_parameter" in value: + name = value["from_parameter"] + return self._process_from_parameter(name=name) + elif "process_graph" in value: + # Don't traverse child process graphs + # TODO: should/can we? Can we know available parameters for validation, or do we skip validation? + return value + else: + return {k: self._process_value(v) for (k, v) in value.items()} + elif isinstance(value, (list, tuple)): + return [self._process_value(v) for v in value] + else: + return value diff --git a/lib/openeo/internal/processes/__init__.py b/lib/openeo/internal/processes/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/lib/openeo/internal/processes/builder.py b/lib/openeo/internal/processes/builder.py new file mode 100644 index 000000000..a2a156eb3 --- /dev/null +++ b/lib/openeo/internal/processes/builder.py @@ -0,0 +1,120 @@ +import inspect +import logging +import warnings +from typing import Any, Callable, Dict, List, Optional, Union + +from openeo.internal.graph_building import FlatGraphableMixin, PGNode, _FromNodeMixin +from openeo.rest import OpenEoClientException + +UNSET = object() +_log = logging.getLogger(__name__) + + +def _to_pgnode_data(value: Any) -> Union[PGNode, dict, Any]: + """Convert given value to valid process graph material""" + if isinstance(value, ProcessBuilderBase): + return value.pgnode + elif isinstance(value, list): + return [_to_pgnode_data(item) for item in value] + elif isinstance(value, Callable): + pg = convert_callable_to_pgnode(value) + return PGNode.to_process_graph_argument(pg) + else: + # Fallback: assume value is valid process graph material already. + return value + + +class ProcessBuilderBase(_FromNodeMixin, FlatGraphableMixin): + """ + Base implementation of a builder pattern that allows constructing process graphs + by calling functions. + """ + + # TODO: can this implementation be merged with PGNode directly? + + def __init__(self, pgnode: Union[PGNode, dict, list]): + self.pgnode = pgnode + + @classmethod + def process(cls, process_id: str, arguments: dict = None, namespace: Union[str, None] = None, **kwargs): + """ + Apply process, using given arguments + + :param process_id: process id of the process. + :param arguments: argument dictionary for the process. + :param namespace: process namespace (only necessary to specify for non-predefined or non-user-defined processes) + :return: new ProcessBuilder instance + """ + arguments = {**(arguments or {}), **kwargs} + arguments = {k: _to_pgnode_data(v) for k, v in arguments.items() if v is not UNSET} + return cls(PGNode(process_id=process_id, arguments=arguments, namespace=namespace)) + + def flat_graph(self) -> Dict[str, dict]: + """Get the process graph in internal flat dict representation.""" + return self.pgnode.flat_graph() + + def from_node(self) -> PGNode: + # _FromNodeMixin API + return self.pgnode + + +def get_parameter_names(process: Callable) -> List[str]: + """Get argument (aka parameter) names of given function/callable.""" + signature = inspect.signature(process) + return [ + p.name + for p in signature.parameters.values() + if p.kind in (inspect.Parameter.POSITIONAL_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD) + ] + + +def convert_callable_to_pgnode(callback: Callable, parent_parameters: Optional[List[str]] = None) -> PGNode: + """ + Convert given process callback to a PGNode. + + >>> result = convert_callable_to_pgnode(lambda x: x + 5) + >>> assert isinstance(result, PGNode) + >>> result.flat_graph() + {"add1": {"process_id": "add", "arguments": {"x": {"from_parameter": "x"}, "y": 5}, "result": True}} + + """ + # TODO: eliminate local import (due to circular dependency)? + from openeo.processes import ProcessBuilder + + process_params = get_parameter_names(callback) + if parent_parameters is None: + # Due to lack of parent parameter information, + # we blindly use all callback's argument names as parameter names + # TODO #426: Instead of guessing: extract expected parent_parameters, e.g. based on parent process_id? + message = f"Blindly using callback parameter names from {callback!r} argument names: {process_params!r}" + if tuple(process_params) not in {(), ("x",), ("data",), ("x", "y")}: + warnings.warn(message) + else: + _log.info(message) + kwargs = {p: ProcessBuilder({"from_parameter": p}) for p in process_params} + elif parent_parameters == ["x", "y"] and (len(process_params) == 1 or process_params[:1] == ["data"]): + # Special case: wrap all parent parameters in an array + kwargs = {process_params[0]: ProcessBuilder([{"from_parameter": p} for p in parent_parameters])} + else: + # Check for direct correspondence between callback arguments and parent parameters (or subset thereof). + common = set(parent_parameters).intersection(process_params) + if common: + kwargs = {p: ProcessBuilder({"from_parameter": p}) for p in common} + elif min(len(parent_parameters), len(process_params)) == 0: + kwargs = {} + elif min(len(parent_parameters), len(process_params)) == 1: + # Fallback for common case of just one callback argument (pass the main parameter), + # or one parent parameter (just pass that one) + kwargs = {process_params[0]: ProcessBuilder({"from_parameter": parent_parameters[0]})} + else: + raise OpenEoClientException( + f"Callback argument mismatch: expected (prefix of) {parent_parameters}, but found found {process_params!r}" + ) + + # "Evaluate" the callback, which should give a ProcessBuilder again to extract pgnode from + result = callback(**kwargs) + if not isinstance(result, ProcessBuilderBase): + raise OpenEoClientException( + f"Callback {callback} did not evaluate to ProcessBuilderBase. Got {result!r} instead" + ) + return result.pgnode diff --git a/lib/openeo/internal/processes/generator.py b/lib/openeo/internal/processes/generator.py new file mode 100644 index 000000000..ee91d18b9 --- /dev/null +++ b/lib/openeo/internal/processes/generator.py @@ -0,0 +1,305 @@ +import argparse +import datetime +import keyword +import sys +import textwrap +from pathlib import Path +from typing import Iterator, List, Optional, Union + +from openeo.internal.processes.parse import Process, parse_all_from_dir + + +class PythonRenderer: + """Generator of Python function source code for a given openEO process""" + + DEFAULT_WIDTH = 115 + + def __init__( + self, + oo_mode: bool = False, + indent: str = " ", + body_template: str = "return _process({id!r}, {args})", + optional_default="None", + return_type_hint: Optional[str] = None, + decorator: Optional[str] = None, + ): + self.oo_mode = oo_mode + self.indent = indent + self.body_template = body_template + self.optional_default = optional_default + self.return_type_hint = return_type_hint + self.decorator = decorator + + def render_process(self, process: Process, prefix: str = None, width: int = DEFAULT_WIDTH) -> str: + if prefix is None: + prefix = " " if self.oo_mode else "" + + # TODO: add type hints + # TODO: width limit? + def_line = "def {id}({args}){th}:".format( + id=self._safe_name(process.id), + args=", ".join(self._def_arguments(process)), + th=" -> {t}".format(t=self.return_type_hint) if self.return_type_hint else "", + ) + + call_args = ", ".join(self._call_args(process)) + if len(call_args) > width: + # TODO: also include `id` placeholder in `self.body_format` + call_args = ( + "\n" + ",\n".join(self.indent + self.indent + a for a in self._call_args(process)) + "\n" + self.indent + ) + body = self.indent + self.body_template.format( + id=process.id, safe_name=self._safe_name(process.id), args=call_args + ) + + lines = ([self.decorator] if self.decorator else []) + [ + def_line, + self.render_docstring(process, width=width - len(prefix), prefix=self.indent), + body, + ] + return textwrap.indent("\n".join(lines), prefix=prefix) + + def _safe_name(self, name: str) -> str: + if keyword.iskeyword(name): + name += "_" + return name + + def _par_names(self, process: Process) -> List[str]: + """Names of the openEO process parameters""" + return [self._safe_name(p.name) for p in process.parameters] + + def _arg_names(self, process: Process) -> List[str]: + """Names of the arguments in the python function""" + arg_names = self._par_names(process) + if self.oo_mode and arg_names: + arg_names[0] = "self" + return arg_names + + def _call_args(self, process: Process) -> Iterator[str]: + for parameter, par_name, arg_name in zip( + process.parameters, self._par_names(process), self._arg_names(process) + ): + arg_expression = arg_name + if parameter.schema.is_process_graph(): + parent_parameters = [p["name"] for p in parameter.schema.schema["parameters"]] + arg_expression = f"build_child_callback({arg_expression}, parent_parameters={parent_parameters})" + if parameter.optional: + arg_expression = ( + f"({arg_expression} if {arg_name} not in [None, {self.optional_default}] else {arg_name})" + ) + yield f"{par_name}={arg_expression}" + + def _def_arguments(self, process: Process) -> Iterator[str]: + # TODO: add argument type hints? + for arg, param in zip(self._arg_names(process), process.parameters): + if param.optional: + yield "{a}={d}".format(a=arg, d=self.optional_default) + elif param.has_default(): + yield "{a}={d!r}".format(a=arg, d=param.default) + else: + yield arg + if self.oo_mode and len(process.parameters) == 0: + yield "self" + + def render_docstring(self, process: Process, prefix="", width: int = DEFAULT_WIDTH) -> str: + w = width - len(prefix) + # TODO: use description instead of summary? + doc = "\n\n".join(textwrap.fill(d, width=w) for d in process.summary.split("\n\n")) + params = "\n".join( + self._hanging_indent(":param {n}: {d}".format(n=arg, d=param.description), width=w) + for arg, param in zip(self._arg_names(process), process.parameters) + ) + returns = self._hanging_indent(":return: {d}".format(d=process.returns.description), width=w) + return textwrap.indent('"""\n' + doc + "\n\n" + (params + "\n\n" + returns).strip() + '\n"""', prefix=prefix) + + def _hanging_indent(self, paragraph: str, indent=" ", width: int = DEFAULT_WIDTH) -> str: + return textwrap.indent(textwrap.fill(paragraph, width=width - len(indent)), prefix=indent).lstrip() + + +def collect_processes(sources: List[Union[Path, str]]) -> List[Process]: + processes = {} + for src in [Path(s) for s in sources]: + if src.is_dir(): + to_add = parse_all_from_dir(src) + else: + to_add = [Process.from_json_file(src)] + for p in to_add: + if p.id in processes: + raise Exception(f"Duplicate source for process {p.id!r}") + processes[p.id] = p + return sorted(processes.values(), key=lambda p: p.id) + + +def generate_process_py(processes: List[Process], output=sys.stdout, argv=None): + oo_src = textwrap.dedent( + """ + from __future__ import annotations + + import builtins + + from openeo.internal.documentation import openeo_process + from openeo.internal.processes.builder import UNSET, ProcessBuilderBase + from openeo.rest._datacube import build_child_callback + + + class ProcessBuilder(ProcessBuilderBase): + \"\"\" + .. include:: api-processbuilder.rst + \"\"\" + + _ITERATION_LIMIT = 100 + + @openeo_process(process_id="add", mode="operator") + def __add__(self, other) -> ProcessBuilder: + return self.add(other) + + @openeo_process(process_id="add", mode="operator") + def __radd__(self, other) -> ProcessBuilder: + return add(other, self) + + @openeo_process(process_id="subtract", mode="operator") + def __sub__(self, other) -> ProcessBuilder: + return self.subtract(other) + + @openeo_process(process_id="subtract", mode="operator") + def __rsub__(self, other) -> ProcessBuilder: + return subtract(other, self) + + @openeo_process(process_id="multiply", mode="operator") + def __mul__(self, other) -> ProcessBuilder: + return self.multiply(other) + + @openeo_process(process_id="multiply", mode="operator") + def __rmul__(self, other) -> ProcessBuilder: + return multiply(other, self) + + @openeo_process(process_id="divide", mode="operator") + def __truediv__(self, other) -> ProcessBuilder: + return self.divide(other) + + @openeo_process(process_id="divide", mode="operator") + def __rtruediv__(self, other) -> ProcessBuilder: + return divide(other, self) + + @openeo_process(process_id="multiply", mode="operator") + def __neg__(self) -> ProcessBuilder: + return self.multiply(-1) + + @openeo_process(process_id="power", mode="operator") + def __pow__(self, other) -> ProcessBuilder: + return self.power(other) + + @openeo_process(process_id="array_element", mode="operator") + def __getitem__(self, key) -> ProcessBuilder: + if isinstance(key, builtins.int): + if key > self._ITERATION_LIMIT: + raise RuntimeError( + "Exceeded ProcessBuilder iteration limit. " + "Are you mistakenly using a Python builtin like `sum()` or `all()` in a callback " + "instead of the appropriate helpers from the `openeo.processes` module?" + ) + return self.array_element(index=key) + else: + return self.array_element(label=key) + + @openeo_process(process_id="eq", mode="operator") + def __eq__(self, other) -> ProcessBuilder: + return eq(self, other) + + @openeo_process(process_id="neq", mode="operator") + def __ne__(self, other) -> ProcessBuilder: + return neq(self, other) + + @openeo_process(process_id="lt", mode="operator") + def __lt__(self, other) -> ProcessBuilder: + return lt(self, other) + + @openeo_process(process_id="lte", mode="operator") + def __le__(self, other) -> ProcessBuilder: + return lte(self, other) + + @openeo_process(process_id="ge", mode="operator") + def __ge__(self, other) -> ProcessBuilder: + return gte(self, other) + + @openeo_process(process_id="gt", mode="operator") + def __gt__(self, other) -> ProcessBuilder: + return gt(self, other) + + """ + ) + fun_src = textwrap.dedent( + """ + # Public shortcut + process = ProcessBuilder.process + # Private shortcut that has lower chance to collide with a process argument named `process` + _process = ProcessBuilder.process + + + """ + ) + fun_renderer = PythonRenderer( + body_template="return _process({id!r}, {args})", + optional_default="UNSET", + return_type_hint="ProcessBuilder", + decorator="@openeo_process", + ) + oo_renderer = PythonRenderer( + oo_mode=True, + body_template="return {safe_name}({args})", + optional_default="UNSET", + return_type_hint="ProcessBuilder", + decorator="@openeo_process", + ) + for p in processes: + fun_src += fun_renderer.render_process(p) + "\n\n\n" + oo_src += oo_renderer.render_process(p) + "\n\n" + output.write( + textwrap.dedent( + """ + # Do not edit this file directly. + # It is automatically generated. + """ + ) + ) + if argv: + output.write( + textwrap.dedent( + """\ + # Used command line arguments: + # {cli} + """.format( + cli=" ".join(argv) + ) + ) + ) + output.write(f"# Generated on {datetime.date.today().isoformat()}\n") + + output.write(oo_src) + output.write(fun_src.rstrip() + "\n") + + +def main(): + # Usage example (from project root): + # # Update subrepos (with process specs) + # python specs/update-subrepos.py + # python openeo/internal/processes/generator.py specs/openeo-processes specs/openeo-processes/proposals --output openeo/processes.py + + argv = sys.argv + arg_parser = argparse.ArgumentParser() + arg_parser.add_argument( + "source", nargs="+", help="""Source directories or files containing openEO process definitions in JSON format""" + ) + arg_parser.add_argument("--output", help="Path to output 'processes.py' file") + + arguments = arg_parser.parse_args(argv[1:]) + sources = arguments.source + output = arguments.output + + processes = collect_processes(sources) + with open(output, "w", encoding="utf-8") if output else sys.stdout as f: + generate_process_py(processes, output=f, argv=argv) + + +if __name__ == "__main__": + main() diff --git a/lib/openeo/internal/processes/parse.py b/lib/openeo/internal/processes/parse.py new file mode 100644 index 000000000..1e22ba6bc --- /dev/null +++ b/lib/openeo/internal/processes/parse.py @@ -0,0 +1,164 @@ +""" +Functionality and tools to process openEO processes. +For example: parse a bunch of JSON descriptions and generate Python (stub) functions. +""" + +from __future__ import annotations + +import json +import re +import typing +from pathlib import Path +from typing import Any, Iterator, List, Optional, Union + +import requests + + +class Schema(typing.NamedTuple): + """Schema description of an openEO process parameter or return value.""" + + schema: Union[dict, list] + + @classmethod + def from_dict(cls, data: dict) -> Schema: + return cls(schema=data) + + def is_process_graph(self) -> bool: + """Is this a {"type": "object", "subtype": "process-graph"} schema?""" + return ( + isinstance(self.schema, dict) + and self.schema.get("type") == "object" + and self.schema.get("subtype") == "process-graph" + ) + + def accepts_geojson(self) -> bool: + """Does this schema accept inline GeoJSON objects?""" + + def is_geojson_schema(schema) -> bool: + return isinstance(schema, dict) and schema.get("type") == "object" and schema.get("subtype") == "geojson" + + if isinstance(self.schema, dict): + return is_geojson_schema(self.schema) + elif isinstance(self.schema, list): + return any(is_geojson_schema(s) for s in self.schema) + return False + + +_NO_DEFAULT = object() + + +class Parameter(typing.NamedTuple): + """openEO process parameter""" + # TODO unify with openeo.api.process.Parameter? + + name: str + description: str + schema: Schema + default: Any = _NO_DEFAULT + optional: bool = False + + @classmethod + def from_dict(cls, data: dict) -> Parameter: + return cls( + name=data["name"], + description=data["description"], + schema=Schema.from_dict(data["schema"]), + default=data.get("default", _NO_DEFAULT), + optional=data.get("optional", False), + ) + + def has_default(self): + return self.default is not _NO_DEFAULT + + +class Returns: + """openEO process return description.""" + + def __init__(self, description: str, schema: Schema): + self.description = description + self.schema = schema + + @classmethod + def from_dict(cls, data: dict) -> Returns: + return cls(description=data["description"], schema=Schema.from_dict(data["schema"])) + + +class Process(typing.NamedTuple): + """ + Container for a opneEO process definition of an openEO process, + covering pre-defined processes, user-defined processes, + remote process definitions, etc. + """ + + # Common-denominator-wise only the process id is a required field in a process definition. + # Depending on the context in the openEO API, some other fields (e.g. "process_graph") + # may also be required. + id: str + parameters: Optional[List[Parameter]] = None + returns: Optional[Returns] = None + description: Optional[str] = None + summary: Optional[str] = None + # TODO: more properties? + + @classmethod + def from_dict(cls, data: dict) -> Process: + """Construct openEO process from dictionary values""" + return cls( + id=data["id"], + parameters=[Parameter.from_dict(d) for d in data["parameters"]] if "parameters" in data else None, + returns=Returns.from_dict(data["returns"]) if "returns" in data else None, + description=data.get("description"), + summary=data.get("summary"), + ) + + @classmethod + def from_json(cls, data: str) -> Process: + """Parse openEO process JSON description.""" + return cls.from_dict(json.loads(data)) + + @classmethod + def from_json_url(cls, url: str) -> Process: + """Parse openEO process JSON description from given URL.""" + return cls.from_dict(requests.get(url).json()) + + @classmethod + def from_json_file(cls, path: Union[str, Path]) -> Process: + """Parse openEO process JSON description file.""" + with Path(path).open("r") as f: + return cls.from_json(f.read()) + + +def parse_all_from_dir(path: Union[str, Path], pattern="*.json") -> Iterator[Process]: + """Parse all openEO process files in given directory""" + for p in sorted(Path(path).glob(pattern)): + yield Process.from_json_file(p) + + +def parse_remote_process_definition(namespace: str, process_id: Optional[str] = None) -> Process: + """ + Parse a process definition as defined by the "Remote Process Definition Extension" spec + https://github.com/Open-EO/openeo-api/tree/draft/extensions/remote-process-definition + """ + if not re.match("https?://", namespace): + raise ValueError(f"Expected absolute URL, but got {namespace!r}") + + resp = requests.get(url=namespace) + resp.raise_for_status() + data = resp.json() + assert isinstance(data, dict) + + if "id" not in data and "processes" in data and isinstance(data["processes"], list): + # Handle process listing: filter out right process + if not isinstance(process_id, str): + raise ValueError(f"Working with process listing, but got invalid process id {process_id!r}") + processes = [p for p in data["processes"] if p.get("id") == process_id] + if len(processes) != 1: + raise LookupError(f"Process {process_id!r} not found in process listing {namespace!r}") + (data,) = processes + + # Some final validation. + assert "id" in data, "Process definition should at least have an 'id' field" + if process_id is not None and data["id"] != process_id: + raise LookupError(f"Expected process id {process_id!r}, but found {data['id']!r}") + + return Process.from_dict(data) diff --git a/lib/openeo/internal/warnings.py b/lib/openeo/internal/warnings.py new file mode 100644 index 000000000..fe9d5489a --- /dev/null +++ b/lib/openeo/internal/warnings.py @@ -0,0 +1,95 @@ +import functools +import inspect +import warnings +from typing import Callable, Optional + +from deprecated.sphinx import deprecated as _deprecated + + +class UserDeprecationWarning(Warning): + """ + Python has a built-in `DeprecationWarning` class to warn about deprecated features, + but as the docs state (https://docs.python.org/3/library/warnings.html): + + when those warnings are intended for other Python developers + + Consequently, the default warning filters are set up to ignore (hide) these warnings + to the software end user. The developer is expected to explicitly set up + the warning filters to show the deprecation warnings again. + + In case of the openeo Python client however, this does not work because the client user + is usually the developer, but probably won't bother setting up warning filters properly. + + This custom warning class can be used as drop in replacement for `DeprecationWarning`, + where the deprecation warning should be visible by default. + """ + + pass + + +def test_warnings(stacklevel=1): + """Trigger some warnings (for test contexts).""" + for warning in [UserWarning, DeprecationWarning, UserDeprecationWarning]: + warnings.warn( + f"This is a {warning.__name__} (stacklevel {stacklevel})", category=warning, stacklevel=stacklevel + ) + + +def legacy_alias(orig: Callable, name: str, *, since: str, mode: str = "full"): + """ + Create legacy alias of given function/method/classmethod/staticmethod + + :param orig: function/method to create legacy alias for + :param name: original name of the alias + :param since: version since when this is alias is deprecated + :param mode: + - "full": raise warnings on calling, only have deprecation note as doc + - "soft": don't raise warning on calling, just add deprecation note to doc + :return: + """ + # TODO: drop `name` argument? + post_process = None + if isinstance(orig, classmethod): + post_process = classmethod + orig = orig.__func__ + kind = "class method" + elif isinstance(orig, staticmethod): + post_process = staticmethod + orig = orig.__func__ + kind = "static method" + elif inspect.ismethod(orig) or "self" in inspect.signature(orig).parameters: + kind = "method" + elif inspect.isfunction(orig): + kind = "function" + else: + raise ValueError(orig) + + # Create a "copy" by wrapping the original + @functools.wraps(orig) + def wrapper(*args, **kwargs): + return orig(*args, **kwargs) + + # Set deprecated name on the wrapper so that deprecation warnings use proper name. + wrapper.__name__ = name + + ref = f":py:{'meth' if 'method' in kind else 'func'}:`.{orig.__name__}`" + message = f"Usage of this legacy {kind} is deprecated. Use {ref} instead." + + if mode == "full": + # Drop original doc block, just show deprecation note. + wrapper.__doc__ = "" + wrapper = deprecated(reason=message, version=since)(wrapper) + elif mode == "soft": + # Only keep first paragraph of original doc block + wrapper.__doc__ = "\n\n".join(orig.__doc__.split("\n\n")[:1] + [f".. deprecated:: {since}\n {message}\n"]) + else: + raise ValueError(mode) + + if post_process: + wrapper = post_process(wrapper) + return wrapper + + +def deprecated(reason: str, version: str): + """Wrapper around `deprecated.sphinx.deprecated` to explicitly set the warning category.""" + return _deprecated(reason=reason, version=version, category=UserDeprecationWarning) diff --git a/lib/openeo/local/__init__.py b/lib/openeo/local/__init__.py new file mode 100644 index 000000000..bb84360b4 --- /dev/null +++ b/lib/openeo/local/__init__.py @@ -0,0 +1,3 @@ +from openeo.local.connection import LocalConnection + +__all__ = ["LocalConnection"] diff --git a/lib/openeo/local/collections.py b/lib/openeo/local/collections.py new file mode 100644 index 000000000..7e5e1b0f1 --- /dev/null +++ b/lib/openeo/local/collections.py @@ -0,0 +1,240 @@ +import logging +from pathlib import Path +from typing import List + +import rioxarray +import xarray as xr +from pyproj import Transformer + +_log = logging.getLogger(__name__) + + +def _get_dimension(dims: dict, candidates: List[str]): + for name in candidates: + if name in dims: + return name + error = f'Dimension matching one of the candidates {candidates} not found! The available ones are {dims}. Please rename the dimension accordingly and try again. This local collection will be skipped.' + raise Exception(error) + + +def _get_netcdf_zarr_metadata(file_path): + if '.zarr' in file_path.suffixes: + data = xr.open_dataset(file_path.as_posix(),chunks={},engine='zarr') + else: + data = xr.open_dataset(file_path.as_posix(),chunks={}) # Add decode_coords='all' if the crs as a band gives some issues + file_path = file_path.as_posix() + try: + t_dim = _get_dimension(data.dims, ['t', 'time', 'temporal', 'DATE']) + except Exception: + t_dim = None + try: + x_dim = _get_dimension(data.dims, ['x', 'X', 'lon', 'longitude']) + y_dim = _get_dimension(data.dims, ['y', 'Y', 'lat', 'latitude']) + except Exception as e: + _log.warning(e) + raise Exception(f'Error creating metadata for {file_path}') from e + metadata = {} + metadata['stac_version'] = '1.0.0-rc.2' + metadata['type'] = 'Collection' + metadata['id'] = file_path + data_attrs_lowercase = [x.lower() for x in data.attrs] + data_attrs_original = [x for x in data.attrs] + data_attrs = dict(zip(data_attrs_lowercase,data_attrs_original)) + if 'title' in data_attrs_lowercase: + metadata['title'] = data.attrs[data_attrs['title']] + else: + metadata['title'] = file_path + if 'description' in data_attrs_lowercase: + metadata['description'] = data.attrs[data_attrs['description']] + else: + metadata['description'] = '' + if 'license' in data_attrs_lowercase: + metadata['license'] = data.attrs[data_attrs['license']] + else: + metadata['license'] = '' + providers = [{'name':'', + 'roles':['producer'], + 'url':''}] + if 'providers' in data_attrs_lowercase: + providers[0]['name'] = data.attrs[data_attrs['providers']] + metadata['providers'] = providers + elif 'institution' in data_attrs_lowercase: + providers[0]['name'] = data.attrs[data_attrs['institution']] + metadata['providers'] = providers + else: + metadata['providers'] = providers + if 'links' in data_attrs_lowercase: + metadata['links'] = data.attrs[data_attrs['links']] + else: + metadata['links'] = '' + x_min = data[x_dim].min().item(0) + x_max = data[x_dim].max().item(0) + y_min = data[y_dim].min().item(0) + y_max = data[y_dim].max().item(0) + + crs_present = False + bands = list(data.data_vars) + if 'crs' in bands: + bands.remove('crs') + crs_present = True + extent = {} + if crs_present: + if "crs_wkt" in data.crs.attrs: + transformer = Transformer.from_crs(data.crs.attrs["crs_wkt"], "epsg:4326") + lat_min, lon_min = transformer.transform(x_min, y_min) + lat_max, lon_max = transformer.transform(x_max, y_max) + extent["spatial"] = {"bbox": [[lon_min, lat_min, lon_max, lat_max]]} + + if t_dim is not None: + t_min = str(data[t_dim].min().values) + t_max = str(data[t_dim].max().values) + extent['temporal'] = {'interval': [[t_min,t_max]]} + + metadata['extent'] = extent + + t_dimension = {} + if t_dim is not None: + t_dimension = {t_dim: {'type': 'temporal', 'extent':[t_min,t_max]}} + + x_dimension = {x_dim: {'type': 'spatial','axis':'x','extent':[x_min,x_max]}} + y_dimension = {y_dim: {'type': 'spatial','axis':'y','extent':[y_min,y_max]}} + if crs_present: + if 'crs_wkt' in data.crs.attrs: + x_dimension[x_dim]['reference_system'] = data.crs.attrs['crs_wkt'] + y_dimension[y_dim]['reference_system'] = data.crs.attrs['crs_wkt'] + + b_dimension = {} + if len(bands)>0: + b_dimension = {'bands': {'type': 'bands', 'values':bands}} + + metadata['cube:dimensions'] = {**t_dimension,**x_dimension,**y_dimension,**b_dimension} + + return metadata + + +def _get_geotiff_metadata(file_path): + data = rioxarray.open_rasterio(file_path.as_posix(),chunks={},band_as_variable=True) + file_path = file_path.as_posix() + try: + t_dim = _get_dimension(data.dims, ['t', 'time', 'temporal', 'DATE']) + except Exception: + t_dim = None + try: + x_dim = _get_dimension(data.dims, ['x', 'X', 'lon', 'longitude']) + y_dim = _get_dimension(data.dims, ['y', 'Y', 'lat', 'latitude']) + except Exception as e: + _log.warning(e) + raise Exception(f'Error creating metadata for {file_path}') from e + + metadata = {} + metadata['stac_version'] = '1.0.0-rc.2' + metadata['type'] = 'Collection' + metadata['id'] = file_path + data_attrs_lowercase = [x.lower() for x in data.attrs] + data_attrs_original = [x for x in data.attrs] + data_attrs = dict(zip(data_attrs_lowercase,data_attrs_original)) + if 'title' in data_attrs_lowercase: + metadata['title'] = data.attrs[data_attrs['title']] + else: + metadata['title'] = file_path + if 'description' in data_attrs_lowercase: + metadata['description'] = data.attrs[data_attrs['description']] + else: + metadata['description'] = '' + if 'license' in data_attrs_lowercase: + metadata['license'] = data.attrs[data_attrs['license']] + else: + metadata['license'] = '' + providers = [{'name':'', + 'roles':['producer'], + 'url':''}] + if 'providers' in data_attrs_lowercase: + providers[0]['name'] = data.attrs[data_attrs['providers']] + metadata['providers'] = providers + elif 'institution' in data_attrs_lowercase: + providers[0]['name'] = data.attrs[data_attrs['institution']] + metadata['providers'] = providers + else: + metadata['providers'] = providers + if 'links' in data_attrs_lowercase: + metadata['links'] = data.attrs[data_attrs['links']] + else: + metadata['links'] = '' + x_min = data[x_dim].min().item(0) + x_max = data[x_dim].max().item(0) + y_min = data[y_dim].min().item(0) + y_max = data[y_dim].max().item(0) + + crs_present = False + coords = list(data.coords) + if 'spatial_ref' in coords: + # bands.remove('crs') + crs_present = True + bands = [] + for d in data.data_vars: + data_attrs_lowercase = [x.lower() for x in data[d].attrs] + data_attrs_original = [x for x in data[d].attrs] + data_attrs = dict(zip(data_attrs_lowercase,data_attrs_original)) + if 'description' in data_attrs_lowercase: + bands.append(data[d].attrs[data_attrs['description']]) + else: + bands.append(d) + extent = {} + if crs_present: + if 'crs_wkt' in data.spatial_ref.attrs: + transformer = Transformer.from_crs(data.spatial_ref.attrs['crs_wkt'], 'epsg:4326') + lat_min,lon_min = transformer.transform(x_min,y_min) + lat_max,lon_max = transformer.transform(x_max,y_max) + extent['spatial'] = {'bbox': [[lon_min, lat_min, lon_max, lat_max]]} + + if t_dim is not None: + t_min = str(data[t_dim].min().values) + t_max = str(data[t_dim].max().values) + extent['temporal'] = {'interval': [[t_min,t_max]]} + + metadata['extent'] = extent + + t_dimension = {} + if t_dim is not None: + t_dimension = {t_dim: {'type': 'temporal', 'extent':[t_min,t_max]}} + + x_dimension = {x_dim: {'type': 'spatial','axis':'x','extent':[x_min,x_max]}} + y_dimension = {y_dim: {'type': 'spatial','axis':'y','extent':[y_min,y_max]}} + if crs_present: + if 'crs_wkt' in data.spatial_ref.attrs: + x_dimension[x_dim]['reference_system'] = data.spatial_ref.attrs['crs_wkt'] + y_dimension[y_dim]['reference_system'] = data.spatial_ref.attrs['crs_wkt'] + + b_dimension = {} + if len(bands)>0: + b_dimension = {'bands': {'type': 'bands', 'values':bands}} + + metadata['cube:dimensions'] = {**t_dimension,**x_dimension,**y_dimension,**b_dimension} + + return metadata + + +def _get_local_collections(local_collections_path): + if isinstance(local_collections_path,str): + local_collections_path = [local_collections_path] + local_collections_list = [] + for flds in local_collections_path: + local_collections_netcdf_zarr = [p for p in Path(flds).rglob('*') if p.suffix in ['.nc','.zarr']] + for local_file in local_collections_netcdf_zarr: + try: + metadata = _get_netcdf_zarr_metadata(local_file) + local_collections_list.append(metadata) + except Exception as e: + _log.error(e) + continue + local_collections_geotiffs = [p for p in Path(flds).rglob('*') if p.suffix in ['.tif','.tiff']] + for local_file in local_collections_geotiffs: + try: + metadata = _get_geotiff_metadata(local_file) + local_collections_list.append(metadata) + except Exception as e: + _log.error(e) + continue + local_collections_dict = {'collections':local_collections_list} + + return local_collections_dict diff --git a/lib/openeo/local/connection.py b/lib/openeo/local/connection.py new file mode 100644 index 000000000..7de3cd452 --- /dev/null +++ b/lib/openeo/local/connection.py @@ -0,0 +1,285 @@ +import datetime +import logging +from pathlib import Path +from typing import Callable, Dict, List, Optional, Union + +import numpy as np +import xarray as xr +from openeo_pg_parser_networkx.graph import OpenEOProcessGraph +from openeo_pg_parser_networkx.pg_schema import BoundingBox, TemporalInterval +from openeo_processes_dask.process_implementations.cubes import load_stac + +from openeo.internal.graph_building import PGNode, as_flat_graph +from openeo.internal.jupyter import VisualDict, VisualList +from openeo.local.collections import ( + _get_geotiff_metadata, + _get_local_collections, + _get_netcdf_zarr_metadata, +) +from openeo.local.processing import PROCESS_REGISTRY +from openeo.metadata import ( + Band, + BandDimension, + CollectionMetadata, + SpatialDimension, + TemporalDimension, +) +from openeo.rest.datacube import DataCube + +_log = logging.getLogger(__name__) + + +class LocalConnection(): + """ + Connection to no backend, for local processing. + """ + + def __init__(self,local_collections_path: Union[str,List]): + """ + Constructor of LocalConnection. + + :param local_collections_path: String or list of strings, path to the folder(s) with + the local collections in netCDF, geoTIFF or ZARR. + """ + self.local_collections_path = local_collections_path + + def list_collections(self) -> List[dict]: + """ + List basic metadata of all collections provided in the local collections folder. + + .. caution:: + :return: list of dictionaries with basic collection metadata. + """ + data = _get_local_collections(self.local_collections_path)["collections"] + return VisualList("collections", data=data) + + def describe_collection(self, collection_id: str) -> dict: + """ + Get full collection metadata for given collection id. + + .. seealso:: + + :py:meth:`~openeo.rest.connection.Connection.list_collection_ids` + to list all collection ids provided by the back-end. + + :param collection_id: collection id + :return: collection metadata. + """ + local_collection = Path(collection_id) + if '.nc' in local_collection.suffixes or '.zarr' in local_collection.suffixes: + data = _get_netcdf_zarr_metadata(local_collection) + elif '.tif' in local_collection.suffixes or '.tiff' in local_collection.suffixes: + data = _get_geotiff_metadata(local_collection) + return VisualDict("collection", data=data) + + def collection_metadata(self, name) -> CollectionMetadata: + # TODO: duplication with `Connection.describe_collection`: deprecate one or the other? + return CollectionMetadata(metadata=self.describe_collection(name)) + + def load_collection( + self, + collection_id: str, + spatial_extent: Optional[Dict[str, float]] = None, + temporal_extent: Optional[List[Union[str, datetime.datetime, datetime.date]]] = None, + bands: Optional[List[str]] = None, + properties: Optional[Dict[str, Union[str, PGNode, Callable]]] = None, + fetch_metadata: bool = True, + ) -> DataCube: + """ + Load a DataCube by collection id. + + :param collection_id: image collection identifier + :param spatial_extent: limit data to specified bounding box or polygons + :param temporal_extent: limit data to specified temporal interval + :param bands: only add the specified bands + :param properties: limit data by metadata property predicates + :return: a datacube containing the requested data + """ + return DataCube.load_collection( + collection_id=collection_id, connection=self, + spatial_extent=spatial_extent, temporal_extent=temporal_extent, bands=bands, properties=properties, + fetch_metadata=fetch_metadata, + ) + + def datacube_from_process(self, process_id: str, namespace: Optional[str] = None, **kwargs) -> DataCube: + """ + Load a data cube from a (custom) process. + + :param process_id: The process id. + :param namespace: optional: process namespace + :param kwargs: The arguments of the custom process + :return: A :py:class:`DataCube`, without valid metadata, as the client is not aware of this custom process. + """ + graph = PGNode(process_id, namespace=namespace, arguments=kwargs) + return DataCube(graph=graph, connection=self) + + def load_stac( + self, + url: str, + spatial_extent: Optional[Dict[str, float]] = None, + temporal_extent: Optional[List[Union[str, datetime.datetime, datetime.date]]] = None, + bands: Optional[List[str]] = None, + properties: Optional[dict] = None, + ) -> DataCube: + """ + Loads data from a static STAC catalog or a STAC API Collection and returns the data as a processable :py:class:`DataCube`. + A batch job result can be loaded by providing a reference to it. + + If supported by the underlying metadata and file format, the data that is added to the data cube can be + restricted with the parameters ``spatial_extent``, ``temporal_extent`` and ``bands``. + If no data is available for the given extents, a ``NoDataAvailable`` error is thrown. + + Remarks: + + * The bands (and all dimensions that specify nominal dimension labels) are expected to be ordered as + specified in the metadata if the ``bands`` parameter is set to ``null``. + * If no additional parameter is specified this would imply that the whole data set is expected to be loaded. + Due to the large size of many data sets, this is not recommended and may be optimized by back-ends to only + load the data that is actually required after evaluating subsequent processes such as filters. + This means that the values should be processed only after the data has been limited to the required extent + and as a consequence also to a manageable size. + + + :param url: The URL to a static STAC catalog (STAC Item, STAC Collection, or STAC Catalog) + or a specific STAC API Collection that allows to filter items and to download assets. + This includes batch job results, which itself are compliant to STAC. + For external URLs, authentication details such as API keys or tokens may need to be included in the URL. + + Batch job results can be specified in two ways: + + - For Batch job results at the same back-end, a URL pointing to the corresponding batch job results + endpoint should be provided. The URL usually ends with ``/jobs/{id}/results`` and ``{id}`` + is the corresponding batch job ID. + - For external results, a signed URL must be provided. Not all back-ends support signed URLs, + which are provided as a link with the link relation `canonical` in the batch job result metadata. + :param spatial_extent: + Limits the data to load to the specified bounding box or polygons. + + For raster data, the process loads the pixel into the data cube if the point at the pixel center intersects + with the bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). + + For vector data, the process loads the geometry into the data cube if the geometry is fully within the + bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). + Empty geometries may only be in the data cube if no spatial extent has been provided. + + The GeoJSON can be one of the following feature types: + + * A ``Polygon`` or ``MultiPolygon`` geometry, + * a ``Feature`` with a ``Polygon`` or ``MultiPolygon`` geometry, or + * a ``FeatureCollection`` containing at least one ``Feature`` with ``Polygon`` or ``MultiPolygon`` geometries. + + Set this parameter to ``None`` to set no limit for the spatial extent. + Be careful with this when loading large datasets. It is recommended to use this parameter instead of + using ``filter_bbox()`` or ``filter_spatial()`` directly after loading unbounded data. + + :param temporal_extent: + Limits the data to load to the specified left-closed temporal interval. + Applies to all temporal dimensions. + The interval has to be specified as an array with exactly two elements: + + 1. The first element is the start of the temporal interval. + The specified instance in time is **included** in the interval. + 2. The second element is the end of the temporal interval. + The specified instance in time is **excluded** from the interval. + + The second element must always be greater/later than the first element. + Otherwise, a `TemporalExtentEmpty` exception is thrown. + + Also supports open intervals by setting one of the boundaries to ``None``, but never both. + + Set this parameter to ``None`` to set no limit for the temporal extent. + Be careful with this when loading large datasets. It is recommended to use this parameter instead of + using ``filter_temporal()`` directly after loading unbounded data. + + :param bands: + Only adds the specified bands into the data cube so that bands that don't match the list + of band names are not available. Applies to all dimensions of type `bands`. + + Either the unique band name (metadata field ``name`` in bands) or one of the common band names + (metadata field ``common_name`` in bands) can be specified. + If the unique band name and the common name conflict, the unique band name has a higher priority. + + The order of the specified array defines the order of the bands in the data cube. + If multiple bands match a common name, all matched bands are included in the original order. + + It is recommended to use this parameter instead of using ``filter_bands()`` directly after loading unbounded data. + + :param properties: + Limits the data by metadata properties to include only data in the data cube which + all given conditions return ``True`` for (AND operation). + + Specify key-value-pairs with the key being the name of the metadata property, + which can be retrieved with the openEO Data Discovery for Collections. + The value must be a condition (user-defined process) to be evaluated against a STAC API. + This parameter is not supported for static STAC. + + .. versionadded:: 0.21.0 + """ + arguments = {"url": url} + # TODO: more normalization/validation of extent/band parameters and `properties` + if spatial_extent is not None: + arguments["spatial_extent"] = spatial_extent + if temporal_extent is not None: + arguments["temporal_extent"] = DataCube._get_temporal_extent(extent=temporal_extent) + if bands is not None: + arguments["bands"] = bands + if properties is not None: + arguments["properties"] = properties + cube = self.datacube_from_process(process_id="load_stac", **arguments) + # detect actual metadata from URL + # run load_stac to get the datacube metadata + if spatial_extent is not None: + arguments["spatial_extent"] = BoundingBox.parse_obj(spatial_extent) + if temporal_extent is not None: + arguments["temporal_extent"] = TemporalInterval.parse_obj(temporal_extent) + xarray_cube = load_stac(**arguments) + attrs = xarray_cube.attrs + for at in attrs: + # allowed types: str, Number, ndarray, number, list, tuple + if not isinstance(attrs[at], (int, float, str, np.ndarray, list, tuple)): + attrs[at] = str(attrs[at]) + metadata = CollectionMetadata( + attrs, + dimensions=[ + SpatialDimension(name=xarray_cube.openeo.x_dim, extent=[]), + SpatialDimension(name=xarray_cube.openeo.y_dim, extent=[]), + TemporalDimension(name=xarray_cube.openeo.temporal_dims[0], extent=[]), + BandDimension( + name=xarray_cube.openeo.band_dims[0], + bands=[Band(name=x) for x in xarray_cube[xarray_cube.openeo.band_dims[0]].values], + ), + ], + ) + cube.metadata = metadata + return cube + + def list_udf_runtimes(self) -> dict: + """ + Loads all available UDF runtimes. + + :return: All available UDF runtimes + """ + runtimes = { + "Python": {"title": "Python 3", "type": "language", "versions": {"3": {"libraries": {}}}, "default": "3"} + } + return VisualDict("udf-runtimes", data=runtimes) + + def execute( + self, + process_graph: Union[dict, str, Path], + *, + validate: Optional[bool] = None, + auto_decode: bool = True, + ) -> xr.DataArray: + """ + Execute locally the process graph and return the result as an xarray.DataArray. + + :param process_graph: (flat) dict representing a process graph, or process graph as raw JSON string, + :return: a datacube containing the requested data + """ + if validate: + raise ValueError("LocalConnection does not support process graph validation") + if auto_decode is not True: + raise ValueError("LocalConnection requires auto_decode=True") + process_graph = as_flat_graph(process_graph) + return OpenEOProcessGraph(process_graph).to_callable(PROCESS_REGISTRY)() diff --git a/lib/openeo/local/processing.py b/lib/openeo/local/processing.py new file mode 100644 index 000000000..4adce909d --- /dev/null +++ b/lib/openeo/local/processing.py @@ -0,0 +1,82 @@ +import inspect +import logging +from pathlib import Path + +import openeo_processes_dask.process_implementations +import openeo_processes_dask.specs +import rasterio +import rioxarray +import xarray as xr +from openeo_pg_parser_networkx import ProcessRegistry +from openeo_pg_parser_networkx.process_registry import Process +from openeo_processes_dask.process_implementations.core import process +from openeo_processes_dask.process_implementations.data_model import RasterCube + +_log = logging.getLogger(__name__) + + +def init_process_registry(): + process_registry = ProcessRegistry(wrap_funcs=[process]) + + # Import these pre-defined processes from openeo_processes_dask and register them into registry + processes_from_module = [ + func + for _, func in inspect.getmembers( + openeo_processes_dask.process_implementations, + inspect.isfunction, + ) + ] + + specs = {} + for func in processes_from_module: + try: + specs[func.__name__] = getattr(openeo_processes_dask.specs, func.__name__) + except Exception: + continue + + for func in processes_from_module: + try: + process_registry[func.__name__] = Process( + spec=specs[func.__name__], implementation=func + ) + except Exception: + continue + return process_registry + + +PROCESS_REGISTRY = init_process_registry() + + +def load_local_collection(*args, **kwargs): + pretty_args = {k: repr(v)[:80] for k, v in kwargs.items()} + _log.info("Running process load_collection") + _log.debug( + f"Running process load_collection with resolved parameters: {pretty_args}" + ) + collection = Path(kwargs['id']) + if '.zarr' in collection.suffixes: + data = xr.open_dataset(kwargs['id'],chunks={},engine='zarr') + elif '.nc' in collection.suffixes: + data = xr.open_dataset(kwargs['id'],chunks={},decode_coords='all') # Add decode_coords='all' if the crs as a band gives some issues + crs = None + if 'crs' in data.coords: + if 'spatial_ref' in data.crs.attrs: + crs = data.crs.attrs['spatial_ref'] + elif 'crs_wkt' in data.crs.attrs: + crs = data.crs.attrs['crs_wkt'] + data = data.to_array(dim='bands') + if crs is not None: + data.rio.write_crs(crs,inplace=True) + elif '.tiff' in collection.suffixes or '.tif' in collection.suffixes: + data = rioxarray.open_rasterio(kwargs['id'],chunks={},band_as_variable=True) + for d in data.data_vars: + descriptions = [v for k, v in data[d].attrs.items() if k.lower() == "description"] + if descriptions: + data = data.rename({d: descriptions[0]}) + data = data.to_array(dim='bands') + return data + +PROCESS_REGISTRY["load_collection"] = Process( + spec=openeo_processes_dask.specs.load_collection, + implementation=load_local_collection, +) diff --git a/lib/openeo/metadata.py b/lib/openeo/metadata.py new file mode 100644 index 000000000..8b7701579 --- /dev/null +++ b/lib/openeo/metadata.py @@ -0,0 +1,711 @@ +from __future__ import annotations + +import functools +import logging +import warnings +from typing import Any, Callable, Dict, List, NamedTuple, Optional, Set, Tuple, Union + +import pystac +import pystac.extensions.datacube +import pystac.extensions.eo +import pystac.extensions.item_assets + +from openeo.internal.jupyter import render_component +from openeo.util import Rfc3339, deep_get + +_log = logging.getLogger(__name__) + + +class MetadataException(Exception): + pass + + +class DimensionAlreadyExistsException(MetadataException): + pass + + +# TODO: make these dimension classes immutable data classes +class Dimension: + """Base class for dimensions.""" + + def __init__(self, type: str, name: str): + self.type = type + self.name = name + + def __repr__(self): + return "{c}({f})".format( + c=self.__class__.__name__, + f=", ".join("{k!s}={v!r}".format(k=k, v=v) for (k, v) in self.__dict__.items()) + ) + + def __eq__(self, other): + return self.__class__ == other.__class__ and self.__dict__ == other.__dict__ + + def rename(self, name) -> Dimension: + """Create new dimension with new name.""" + return Dimension(type=self.type, name=name) + + def rename_labels(self, target, source) -> Dimension: + """ + Rename labels, if the type of dimension allows it. + + :param target: List of target labels + :param source: Source labels, or empty list + :return: A new dimension with modified labels, or the same if no change is applied. + """ + # In general, we don't have/manage label info here, so do nothing. + return Dimension(type=self.type, name=self.name) + + +class SpatialDimension(Dimension): + DEFAULT_CRS = 4326 + + def __init__( + self, + name: str, + extent: Union[Tuple[float, float], List[float]], + crs: Union[str, int, dict] = DEFAULT_CRS, + step=None, + ): + """ + + @param name: + @param extent: + @param crs: + @param step: The space between the values. Use null for irregularly spaced steps. + """ + super().__init__(type="spatial", name=name) + self.extent = extent + self.crs = crs + self.step = step + + def rename(self, name) -> Dimension: + return SpatialDimension(name=name, extent=self.extent, crs=self.crs, step=self.step) + + +class TemporalDimension(Dimension): + def __init__(self, name: str, extent: Union[Tuple[str, str], List[str]]): + super().__init__(type="temporal", name=name) + self.extent = extent + + def rename(self, name) -> Dimension: + return TemporalDimension(name=name, extent=self.extent) + + def rename_labels(self, target, source) -> Dimension: + # TODO should we check if the extent has changed with the new labels? + return TemporalDimension(name=self.name, extent=self.extent) + + +class Band(NamedTuple): + """ + Simple container class for band metadata. + Based on https://github.com/stac-extensions/eo#band-object + """ + + name: str + common_name: Optional[str] = None + # wavelength in micrometer + wavelength_um: Optional[float] = None + aliases: Optional[List[str]] = None + # "openeo:gsd" field (https://github.com/Open-EO/openeo-stac-extensions#GSD-Object) + gsd: Optional[dict] = None + + +class BandDimension(Dimension): + # TODO #575 support unordered bands and avoid assumption that band order is known. + def __init__(self, name: str, bands: List[Band]): + super().__init__(type="bands", name=name) + self.bands = bands + + @property + def band_names(self) -> List[str]: + return [b.name for b in self.bands] + + @property + def band_aliases(self) -> List[List[str]]: + return [b.aliases for b in self.bands] + + @property + def common_names(self) -> List[str]: + return [b.common_name for b in self.bands] + + def band_index(self, band: Union[int, str]) -> int: + """ + Resolve a given band (common) name/index to band index + + :param band: band name, common name or index + :return int: band index + """ + band_names = self.band_names + if isinstance(band, int) and 0 <= band < len(band_names): + return band + elif isinstance(band, str): + common_names = self.common_names + # First try common names if possible + if band in common_names: + return common_names.index(band) + if band in band_names: + return band_names.index(band) + # Check band aliases to still support old band names + aliases = [True if aliases and band in aliases else False for aliases in self.band_aliases] + if any(aliases): + return aliases.index(True) + raise ValueError("Invalid band name/index {b!r}. Valid names: {n!r}".format(b=band, n=band_names)) + + def band_name(self, band: Union[str, int], allow_common=True) -> str: + """Resolve (common) name or index to a valid (common) name""" + if isinstance(band, str): + if band in self.band_names: + return band + elif band in self.common_names: + if allow_common: + return band + else: + return self.band_names[self.common_names.index(band)] + elif any([True if aliases and band in aliases else False for aliases in self.band_aliases]): + return self.band_names[self.band_index(band)] + elif isinstance(band, int) and 0 <= band < len(self.bands): + return self.band_names[band] + raise ValueError("Invalid band name/index {b!r}. Valid names: {n!r}".format(b=band, n=self.band_names)) + + def filter_bands(self, bands: List[Union[int, str]]) -> BandDimension: + """ + Construct new BandDimension with subset of bands, + based on given band indices or (common) names + """ + return BandDimension( + name=self.name, + bands=[self.bands[self.band_index(b)] for b in bands] + ) + + def append_band(self, band: Band) -> BandDimension: + """Create new BandDimension with appended band.""" + if band.name in self.band_names: + raise ValueError("Duplicate band {b!r}".format(b=band)) + + return BandDimension( + name=self.name, + bands=self.bands + [band] + ) + + def rename_labels(self, target, source) -> Dimension: + if source: + if len(target) != len(source): + raise ValueError( + "In rename_labels, `target` and `source` should have same number of labels, " + "but got: `target` {t} and `source` {s}".format(t=target, s=source) + ) + new_bands = self.bands.copy() + for old_name, new_name in zip(source, target): + band_index = self.band_index(old_name) + the_band = new_bands[band_index] + new_bands[band_index] = Band( + name=new_name, + common_name=the_band.common_name, + wavelength_um=the_band.wavelength_um, + aliases=the_band.aliases, + gsd=the_band.gsd, + ) + else: + new_bands = [Band(name=n) for n in target] + return BandDimension(name=self.name, bands=new_bands) + + def rename(self, name) -> Dimension: + return BandDimension(name=name, bands=self.bands) + +class CubeMetadata: + """ + Interface for metadata of a data cube. + + Allows interaction with the cube dimensions and their labels (if available). + """ + + def __init__(self, dimensions: Optional[List[Dimension]] = None): + # Original collection metadata (actual cube metadata might be altered through processes) + self._dimensions = dimensions + self._band_dimension = None + self._temporal_dimension = None + + if dimensions is not None: + for dim in self._dimensions: + # TODO: here we blindly pick last bands or temporal dimension if multiple. Let user choose? + # TODO: add spacial dimension handling? + if dim.type == "bands": + if isinstance(dim, BandDimension): + self._band_dimension = dim + else: + raise MetadataException("Invalid band dimension {d!r}".format(d=dim)) + if dim.type == "temporal": + if isinstance(dim, TemporalDimension): + self._temporal_dimension = dim + else: + raise MetadataException("Invalid temporal dimension {d!r}".format(d=dim)) + + def __eq__(self, o: Any) -> bool: + return isinstance(o, type(self)) and self._dimensions == o._dimensions + + def _clone_and_update(self, dimensions: Optional[List[Dimension]] = None, **kwargs) -> CubeMetadata: + """Create a new instance (of same class) with copied/updated fields.""" + cls = type(self) + if dimensions is None: + dimensions = self._dimensions + return cls(dimensions=dimensions, **kwargs) + + def dimension_names(self) -> List[str]: + return list(d.name for d in self._dimensions) + + def assert_valid_dimension(self, dimension: str) -> str: + """Make sure given dimension name is valid.""" + names = self.dimension_names() + if dimension not in names: + raise ValueError(f"Invalid dimension {dimension!r}. Should be one of {names}") + return dimension + + def has_band_dimension(self) -> bool: + return isinstance(self._band_dimension, BandDimension) + + @property + def band_dimension(self) -> BandDimension: + """Dimension corresponding to spectral/logic/thematic "bands".""" + if not self.has_band_dimension(): + raise MetadataException("No band dimension") + return self._band_dimension + + def has_temporal_dimension(self) -> bool: + return isinstance(self._temporal_dimension, TemporalDimension) + + @property + def temporal_dimension(self) -> TemporalDimension: + if not self.has_temporal_dimension(): + raise MetadataException("No temporal dimension") + return self._temporal_dimension + + @property + def spatial_dimensions(self) -> List[SpatialDimension]: + return [d for d in self._dimensions if isinstance(d, SpatialDimension)] + + @property + def bands(self) -> List[Band]: + """Get band metadata as list of Band metadata tuples""" + return self.band_dimension.bands + + @property + def band_names(self) -> List[str]: + """Get band names of band dimension""" + return self.band_dimension.band_names + + @property + def band_common_names(self) -> List[str]: + return self.band_dimension.common_names + + def get_band_index(self, band: Union[int, str]) -> int: + # TODO: eliminate this shortcut for smaller API surface + return self.band_dimension.band_index(band) + + def filter_bands(self, band_names: List[Union[int, str]]) -> CubeMetadata: + """ + Create new `CubeMetadata` with filtered band dimension + :param band_names: list of band names/indices to keep + :return: + """ + assert self.band_dimension + return self._clone_and_update( + dimensions=[d.filter_bands(band_names) if isinstance(d, BandDimension) else d for d in self._dimensions] + ) + + def append_band(self, band: Band) -> CubeMetadata: + """ + Create new `CubeMetadata` with given band added to band dimension. + """ + assert self.band_dimension + return self._clone_and_update( + dimensions=[d.append_band(band) if isinstance(d, BandDimension) else d for d in self._dimensions] + ) + + def rename_labels(self, dimension: str, target: list, source: list = None) -> CubeMetadata: + """ + Renames the labels of the specified dimension from source to target. + + :param dimension: Dimension name + :param target: The new names for the labels. + :param source: The names of the labels as they are currently in the data cube. + + :return: Updated metadata + """ + self.assert_valid_dimension(dimension) + loc = self.dimension_names().index(dimension) + new_dimensions = self._dimensions.copy() + new_dimensions[loc] = new_dimensions[loc].rename_labels(target, source) + + return self._clone_and_update(dimensions=new_dimensions) + + def rename_dimension(self, source: str, target: str) -> CubeMetadata: + """ + Rename source dimension into target, preserving other properties + """ + self.assert_valid_dimension(source) + loc = self.dimension_names().index(source) + new_dimensions = self._dimensions.copy() + new_dimensions[loc] = new_dimensions[loc].rename(target) + + return self._clone_and_update(dimensions=new_dimensions) + + def reduce_dimension(self, dimension_name: str) -> CubeMetadata: + """Create new CubeMetadata object by collapsing/reducing a dimension.""" + # TODO: option to keep reduced dimension (with a single value)? + # TODO: rename argument to `name` for more internal consistency + # TODO: merge with drop_dimension (which does the same). + self.assert_valid_dimension(dimension_name) + loc = self.dimension_names().index(dimension_name) + dimensions = self._dimensions[:loc] + self._dimensions[loc + 1 :] + return self._clone_and_update(dimensions=dimensions) + + def reduce_spatial(self) -> CubeMetadata: + """Create new CubeMetadata object by reducing the spatial dimensions.""" + dimensions = [d for d in self._dimensions if not isinstance(d, SpatialDimension)] + return self._clone_and_update(dimensions=dimensions) + + def add_dimension(self, name: str, label: Union[str, float], type: str = None) -> CubeMetadata: + """Create new CubeMetadata object with added dimension""" + if any(d.name == name for d in self._dimensions): + raise DimensionAlreadyExistsException(f"Dimension with name {name!r} already exists") + if type == "bands": + dim = BandDimension(name=name, bands=[Band(name=label)]) + elif type == "spatial": + dim = SpatialDimension(name=name, extent=[label, label]) + elif type == "temporal": + dim = TemporalDimension(name=name, extent=[label, label]) + else: + dim = Dimension(type=type or "other", name=name) + return self._clone_and_update(dimensions=self._dimensions + [dim]) + + def drop_dimension(self, name: str = None) -> CubeMetadata: + """Create new CubeMetadata object without dropped dimension with given name""" + dimension_names = self.dimension_names() + if name not in dimension_names: + raise ValueError("No dimension named {n!r} (valid names: {ns!r})".format(n=name, ns=dimension_names)) + return self._clone_and_update(dimensions=[d for d in self._dimensions if not d.name == name]) + + def __str__(self) -> str: + bands = self.band_names if self.has_band_dimension() else "no bands dimension" + return f"CubeMetadata({bands} - {self.dimension_names()})" + + +class CollectionMetadata(CubeMetadata): + """ + Wrapper for EO Data Collection metadata. + + Simplifies getting values from deeply nested mappings, + allows additional parsing and normalizing compatibility issues. + + Metadata is expected to follow format defined by + https://openeo.org/documentation/1.0/developers/api/reference.html#operation/describe-collection + (with partial support for older versions) + + """ + + def __init__(self, metadata: dict, dimensions: List[Dimension] = None): + self._orig_metadata = metadata + if dimensions is None: + dimensions = self._parse_dimensions(self._orig_metadata) + + super().__init__(dimensions=dimensions) + + @classmethod + def _parse_dimensions(cls, spec: dict, complain: Callable[[str], None] = warnings.warn) -> List[Dimension]: + """ + Extract data cube dimension metadata from STAC-like description of a collection. + + Dimension metadata comes from different places in spec: + - 'cube:dimensions' has dimension names (e.g. 'x', 'y', 't'), dimension extent info + and band names for band dimensions + - 'eo:bands' has more detailed band information like "common" name and wavelength info + + This helper tries to normalize/combine these sources. + + :param spec: STAC like collection metadata dict + :param complain: handler for warnings + :return list: list of `Dimension` objects + + """ + + # Dimension info is in `cube:dimensions` (or 0.4-style `properties/cube:dimensions`) + cube_dimensions = ( + deep_get(spec, "cube:dimensions", default=None) + or deep_get(spec, "properties", "cube:dimensions", default=None) + or {} + ) + if not cube_dimensions: + complain("No cube:dimensions metadata") + dimensions = [] + for name, info in cube_dimensions.items(): + dim_type = info.get("type") + if dim_type == "spatial": + dimensions.append( + SpatialDimension( + name=name, + extent=info.get("extent"), + crs=info.get("reference_system", SpatialDimension.DEFAULT_CRS), + step=info.get("step", None), + ) + ) + elif dim_type == "temporal": + dimensions.append(TemporalDimension(name=name, extent=info.get("extent"))) + elif dim_type == "bands": + bands = [Band(name=b) for b in info.get("values", [])] + if not bands: + complain("No band names in dimension {d!r}".format(d=name)) + dimensions.append(BandDimension(name=name, bands=bands)) + else: + complain("Unknown dimension type {t!r}".format(t=dim_type)) + dimensions.append(Dimension(name=name, type=dim_type)) + + # Detailed band information: `summaries/[eo|raster]:bands` (and 0.4 style `properties/eo:bands`) + eo_bands = ( + deep_get(spec, "summaries", "eo:bands", default=None) + or deep_get(spec, "summaries", "raster:bands", default=None) + or deep_get(spec, "properties", "eo:bands", default=None) + ) + if eo_bands: + # center_wavelength is in micrometer according to spec + bands_detailed = [ + Band( + name=b["name"], + common_name=b.get("common_name"), + wavelength_um=b.get("center_wavelength"), + aliases=b.get("aliases"), + gsd=b.get("openeo:gsd"), + ) + for b in eo_bands + ] + # Update band dimension with more detailed info + band_dimensions = [d for d in dimensions if d.type == "bands"] + if len(band_dimensions) == 1: + dim = band_dimensions[0] + # Update band values from 'cube:dimensions' with more detailed 'eo:bands' info + eo_band_names = [b.name for b in bands_detailed] + cube_dimension_band_names = [b.name for b in dim.bands] + if eo_band_names == cube_dimension_band_names: + dim.bands = bands_detailed + else: + complain("Band name mismatch: {a} != {b}".format(a=cube_dimension_band_names, b=eo_band_names)) + elif len(band_dimensions) == 0: + if len(dimensions) == 0: + complain("Assuming name 'bands' for anonymous band dimension.") + dimensions.append(BandDimension(name="bands", bands=bands_detailed)) + else: + complain("No 'bands' dimension in 'cube:dimensions' while having 'eo:bands' or 'raster:bands'") + else: + complain("Multiple dimensions of type 'bands'") + + return dimensions + + def _clone_and_update( + self, metadata: dict = None, dimensions: List[Dimension] = None, **kwargs + ) -> CollectionMetadata: + """ + Create a new instance (of same class) with copied/updated fields. + + This overrides the method in `CubeMetadata` to keep the original metadata. + """ + cls = type(self) + if metadata is None: + metadata = self._orig_metadata + if dimensions is None: + dimensions = self._dimensions + return cls(metadata=metadata, dimensions=dimensions, **kwargs) + + def get(self, *args, default=None): + return deep_get(self._orig_metadata, *args, default=default) + + @property + def extent(self) -> dict: + # TODO: is this currently used and relevant? + # TODO: check against extent metadata in dimensions + return self._orig_metadata.get("extent") + + def _repr_html_(self): + return render_component("collection", data=self._orig_metadata) + + def __str__(self) -> str: + bands = self.band_names if self.has_band_dimension() else "no bands dimension" + return f"CollectionMetadata({self.extent} - {bands} - {self.dimension_names()})" + + +def metadata_from_stac(url: str) -> CubeMetadata: + """ + Reads the band metadata a static STAC catalog or a STAC API Collection and returns it as a :py:class:`CubeMetadata` + + :param url: The URL to a static STAC catalog (STAC Item, STAC Collection, or STAC Catalog) or a specific STAC API Collection + :return: A :py:class:`CubeMetadata` containing the DataCube band metadata from the url. + """ + + # TODO move these nested functions and other logic to _StacMetadataParser + + def get_band_metadata(eo_bands_location: dict) -> List[Band]: + # TODO: return None iso empty list when no metadata? + return [ + Band(name=band["name"], common_name=band.get("common_name"), wavelength_um=band.get("center_wavelength")) + for band in eo_bands_location.get("eo:bands", []) + ] + + def get_band_names(bands: List[Band]) -> List[str]: + return [band.name for band in bands] + + def is_band_asset(asset: pystac.Asset) -> bool: + return "eo:bands" in asset.extra_fields + + stac_object = pystac.read_file(href=url) + + if isinstance(stac_object, pystac.Item): + item = stac_object + if "eo:bands" in item.properties: + eo_bands_location = item.properties + elif item.get_collection() is not None: + # TODO: Also do asset based band detection (like below)? + eo_bands_location = item.get_collection().summaries.lists + else: + eo_bands_location = {} + bands = get_band_metadata(eo_bands_location) + + elif isinstance(stac_object, pystac.Collection): + collection = stac_object + bands = get_band_metadata(collection.summaries.lists) + + # Summaries is not a required field in a STAC collection, so also check the assets + for itm in collection.get_items(): + band_assets = {asset_id: asset for asset_id, asset in itm.get_assets().items() if is_band_asset(asset)} + + for asset in band_assets.values(): + asset_bands = get_band_metadata(asset.extra_fields) + for asset_band in asset_bands: + if asset_band.name not in get_band_names(bands): + bands.append(asset_band) + if _PYSTAC_1_9_EXTENSION_INTERFACE and collection.ext.has("item_assets"): + # TODO #575 support unordered band names and avoid conversion to a list. + bands = list(_StacMetadataParser().get_bands_from_item_assets(collection.ext.item_assets)) + + elif isinstance(stac_object, pystac.Catalog): + catalog = stac_object + bands = get_band_metadata(catalog.extra_fields.get("summaries", {})) + else: + raise ValueError(stac_object) + + # TODO: conditionally include band dimension when there was actual indication of band metadata? + band_dimension = BandDimension(name="bands", bands=bands) + dimensions = [band_dimension] + + # TODO: is it possible to derive the actual name of temporal dimension that the backend will use? + temporal_dimension = _StacMetadataParser().get_temporal_dimension(stac_object) + if temporal_dimension: + dimensions.append(temporal_dimension) + + metadata = CubeMetadata(dimensions=dimensions) + return metadata + +# Sniff for PySTAC extension API since version 1.9.0 (which is not available below Python 3.9) +# TODO: remove this once support for Python 3.7 and 3.8 is dropped +_PYSTAC_1_9_EXTENSION_INTERFACE = hasattr(pystac.Item, "ext") + + +class _StacMetadataParser: + """ + Helper to extract openEO metadata from STAC metadata resource + """ + + def __init__(self): + # TODO: toggles for how to handle strictness, warnings, logging, etc + pass + + def _get_band_from_eo_bands_item(self, eo_band: Union[dict, pystac.extensions.eo.Band]) -> Band: + if isinstance(eo_band, pystac.extensions.eo.Band): + return Band( + name=eo_band.name, + common_name=eo_band.common_name, + wavelength_um=eo_band.center_wavelength, + ) + elif isinstance(eo_band, dict) and "name" in eo_band: + return Band( + name=eo_band["name"], + common_name=eo_band.get("common_name"), + wavelength_um=eo_band.get("center_wavelength"), + ) + else: + raise ValueError(eo_band) + + def get_bands_from_eo_bands(self, eo_bands: List[Union[dict, pystac.extensions.eo.Band]]) -> List[Band]: + """ + Extract bands from STAC `eo:bands` array + + :param eo_bands: List of band objects, as dict or `pystac.extensions.eo.Band` instances + """ + # TODO: option to skip bands that failed to parse in some way? + return [self._get_band_from_eo_bands_item(band) for band in eo_bands] + + def _get_bands_from_item_asset( + self, item_asset: pystac.extensions.item_assets.AssetDefinition, *, _warn: Callable[[str], None] = _log.warning + ) -> Union[List[Band], None]: + """Get bands from a STAC 'item_assets' asset definition.""" + if _PYSTAC_1_9_EXTENSION_INTERFACE and item_asset.ext.has("eo"): + if item_asset.ext.eo.bands is not None: + return self.get_bands_from_eo_bands(item_asset.ext.eo.bands) + elif "eo:bands" in item_asset.properties: + # TODO: skip this in strict mode? + if _PYSTAC_1_9_EXTENSION_INTERFACE: + _warn("Extracting band info from 'eo:bands' metadata, but 'eo' STAC extension was not declared.") + return self.get_bands_from_eo_bands(item_asset.properties["eo:bands"]) + + def get_bands_from_item_assets( + self, item_assets: Dict[str, pystac.extensions.item_assets.AssetDefinition] + ) -> Set[Band]: + """ + Get bands extracted from "item_assets" objects (defined by "item-assets" extension, + in combination with "eo" extension) at STAC Collection top-level, + + Note that "item_assets" in STAC is a mapping, so the band order is undefined, + which is why we return a set of bands here. + + :param item_assets: a STAC `item_assets` mapping + """ + bands = set() + # Trick to just warn once per collection + _warn = functools.lru_cache()(_log.warning) + for item_asset in item_assets.values(): + asset_bands = self._get_bands_from_item_asset(item_asset, _warn=_warn) + if asset_bands: + bands.update(asset_bands) + return bands + + def get_temporal_dimension(self, stac_obj: pystac.STACObject) -> Union[TemporalDimension, None]: + """ + Extract the temporal dimension from a STAC Collection/Item (if any) + """ + # TODO: also extract temporal dimension from assets? + if _PYSTAC_1_9_EXTENSION_INTERFACE: + if stac_obj.ext.has("cube") and hasattr(stac_obj.ext, "cube"): + temporal_dims = [ + (n, d.extent or [None, None]) + for (n, d) in stac_obj.ext.cube.dimensions.items() + if d.dim_type == pystac.extensions.datacube.DimensionType.TEMPORAL + ] + if len(temporal_dims) == 1: + name, extent = temporal_dims[0] + return TemporalDimension(name=name, extent=extent) + elif isinstance(stac_obj, pystac.Collection) and stac_obj.extent.temporal: + # No explicit "cube:dimensions": build fallback from "extent.temporal", + # with dimension name "t" (openEO API recommendation). + extent = [Rfc3339(propagate_none=True).normalize(d) for d in stac_obj.extent.temporal.intervals[0]] + return TemporalDimension(name="t", extent=extent) + else: + if isinstance(stac_obj, pystac.Item): + cube_dimensions = stac_obj.properties.get("cube:dimensions", {}) + elif isinstance(stac_obj, pystac.Collection): + cube_dimensions = stac_obj.extra_fields.get("cube:dimensions", {}) + else: + cube_dimensions = {} + temporal_dims = [ + (n, d.get("extent", [None, None])) for (n, d) in cube_dimensions.items() if d.get("type") == "temporal" + ] + if len(temporal_dims) == 1: + name, extent = temporal_dims[0] + return TemporalDimension(name=name, extent=extent) diff --git a/lib/openeo/processes.py b/lib/openeo/processes.py new file mode 100644 index 000000000..fcc13312f --- /dev/null +++ b/lib/openeo/processes.py @@ -0,0 +1,5590 @@ + +# Do not edit this file directly. +# It is automatically generated. +# Used command line arguments: +# openeo/internal/processes/generator.py specs/openeo-processes specs/openeo-processes/proposals specs/openeo-processes-legacy --output openeo/processes.py +# Generated on 2024-01-09 + +from __future__ import annotations + +import builtins + +from openeo.internal.documentation import openeo_process +from openeo.internal.processes.builder import UNSET, ProcessBuilderBase +from openeo.rest._datacube import build_child_callback + + +class ProcessBuilder(ProcessBuilderBase): + """ + .. include:: api-processbuilder.rst + """ + + _ITERATION_LIMIT = 100 + + @openeo_process(process_id="add", mode="operator") + def __add__(self, other) -> ProcessBuilder: + return self.add(other) + + @openeo_process(process_id="add", mode="operator") + def __radd__(self, other) -> ProcessBuilder: + return add(other, self) + + @openeo_process(process_id="subtract", mode="operator") + def __sub__(self, other) -> ProcessBuilder: + return self.subtract(other) + + @openeo_process(process_id="subtract", mode="operator") + def __rsub__(self, other) -> ProcessBuilder: + return subtract(other, self) + + @openeo_process(process_id="multiply", mode="operator") + def __mul__(self, other) -> ProcessBuilder: + return self.multiply(other) + + @openeo_process(process_id="multiply", mode="operator") + def __rmul__(self, other) -> ProcessBuilder: + return multiply(other, self) + + @openeo_process(process_id="divide", mode="operator") + def __truediv__(self, other) -> ProcessBuilder: + return self.divide(other) + + @openeo_process(process_id="divide", mode="operator") + def __rtruediv__(self, other) -> ProcessBuilder: + return divide(other, self) + + @openeo_process(process_id="multiply", mode="operator") + def __neg__(self) -> ProcessBuilder: + return self.multiply(-1) + + @openeo_process(process_id="power", mode="operator") + def __pow__(self, other) -> ProcessBuilder: + return self.power(other) + + @openeo_process(process_id="array_element", mode="operator") + def __getitem__(self, key) -> ProcessBuilder: + if isinstance(key, builtins.int): + if key > self._ITERATION_LIMIT: + raise RuntimeError( + "Exceeded ProcessBuilder iteration limit. " + "Are you mistakenly using a Python builtin like `sum()` or `all()` in a callback " + "instead of the appropriate helpers from the `openeo.processes` module?" + ) + return self.array_element(index=key) + else: + return self.array_element(label=key) + + @openeo_process(process_id="eq", mode="operator") + def __eq__(self, other) -> ProcessBuilder: + return eq(self, other) + + @openeo_process(process_id="neq", mode="operator") + def __ne__(self, other) -> ProcessBuilder: + return neq(self, other) + + @openeo_process(process_id="lt", mode="operator") + def __lt__(self, other) -> ProcessBuilder: + return lt(self, other) + + @openeo_process(process_id="lte", mode="operator") + def __le__(self, other) -> ProcessBuilder: + return lte(self, other) + + @openeo_process(process_id="ge", mode="operator") + def __ge__(self, other) -> ProcessBuilder: + return gte(self, other) + + @openeo_process(process_id="gt", mode="operator") + def __gt__(self, other) -> ProcessBuilder: + return gt(self, other) + + @openeo_process + def absolute(self) -> ProcessBuilder: + """ + Absolute value + + :param self: A number. + + :return: The computed absolute value. + """ + return absolute(x=self) + + @openeo_process + def add(self, y) -> ProcessBuilder: + """ + Addition of two numbers + + :param self: The first summand. + :param y: The second summand. + + :return: The computed sum of the two numbers. + """ + return add(x=self, y=y) + + @openeo_process + def add_dimension(self, name, label, type=UNSET) -> ProcessBuilder: + """ + Add a new dimension + + :param self: A data cube to add the dimension to. + :param name: Name for the dimension. + :param label: A dimension label. + :param type: The type of dimension, defaults to `other`. + + :return: The data cube with a newly added dimension. The new dimension has exactly one dimension label. + All other dimensions remain unchanged. + """ + return add_dimension(data=self, name=name, label=label, type=type) + + @openeo_process + def aggregate_spatial(self, geometries, reducer, target_dimension=UNSET, context=UNSET) -> ProcessBuilder: + """ + Zonal statistics for geometries + + :param self: A raster data cube with at least two spatial dimensions. The data cube implicitly gets + restricted to the bounds of the geometries as if ``filter_spatial()`` would have been used with the + same values for the corresponding parameters immediately before this process. + :param geometries: Geometries for which the aggregation will be computed. Feature properties are + preserved for vector data cubes and all GeoJSON Features. One value will be computed per label in the + dimension of type `geometries`, GeoJSON `Feature` or `Geometry`. For a `FeatureCollection` multiple + values will be computed, one value per contained `Feature`. No values will be computed for empty + geometries. For example, a single value will be computed for a `MultiPolygon`, but two values will be + computed for a `FeatureCollection` containing two polygons. - For **polygons**, the process considers + all pixels for which the point at the pixel center intersects with the corresponding polygon (as + defined in the Simple Features standard by the OGC). - For **points**, the process considers the + closest pixel center. - For **lines** (line strings), the process considers all the pixels whose + centers are closest to at least one point on the line. Thus, pixels may be part of multiple geometries + and be part of multiple aggregations. No operation is applied to geometries that are outside of the + bounds of the data. + :param reducer: A reducer to be applied on all values of each geometry. A reducer is a single process + such as ``mean()`` or a set of processes, which computes a single value for a list of values, see the + category 'reducer' for such processes. + :param target_dimension: By default (which is `null`), the process only computes the results and + doesn't add a new dimension. If this parameter contains a new dimension name, the computation also + stores information about the total count of pixels (valid + invalid pixels) and the number of valid + pixels (see ``is_valid()``) for each computed value. These values are added as a new dimension. The new + dimension of type `other` has the dimension labels `value`, `total_count` and `valid_count`. Fails + with a `TargetDimensionExists` exception if a dimension with the specified name exists. + :param context: Additional data to be passed to the reducer. + + :return: A vector data cube with the computed results. Empty geometries still exist but without any + aggregated values (i.e. no-data). The spatial dimensions are replaced by a dimension of type + 'geometries' and if `target_dimension` is not `null`, a new dimension is added. + """ + return aggregate_spatial( + data=self, + geometries=geometries, + reducer=build_child_callback(reducer, parent_parameters=['data', 'context']), + target_dimension=target_dimension, + context=context + ) + + @openeo_process + def aggregate_spatial_window(self, reducer, size, boundary=UNSET, align=UNSET, context=UNSET) -> ProcessBuilder: + """ + Zonal statistics for rectangular windows + + :param self: A raster data cube with exactly two horizontal spatial dimensions and an arbitrary number + of additional dimensions. The process is applied to all additional dimensions individually. + :param reducer: A reducer to be applied on the list of values, which contain all pixels covered by the + window. A reducer is a single process such as ``mean()`` or a set of processes, which computes a single + value for a list of values, see the category 'reducer' for such processes. + :param size: Window size in pixels along the horizontal spatial dimensions. The first value + corresponds to the `x` axis, the second value corresponds to the `y` axis. + :param boundary: Behavior to apply if the number of values for the axes `x` and `y` is not a multiple + of the corresponding value in the `size` parameter. Options are: - `pad` (default): pad the data cube + with the no-data value `null` to fit the required window size. - `trim`: trim the data cube to fit the + required window size. Set the parameter `align` to specifies to which corner the data is aligned to. + :param align: If the data requires padding or trimming (see parameter `boundary`), specifies to which + corner of the spatial extent the data is aligned to. For example, if the data is aligned to the upper + left, the process pads/trims at the lower-right. + :param context: Additional data to be passed to the reducer. + + :return: A raster data cube with the newly computed values and the same dimensions. The resolution + will change depending on the chosen values for the `size` and `boundary` parameter. It usually + decreases for the dimensions which have the corresponding parameter `size` set to values greater than + 1. The dimension labels will be set to the coordinate at the center of the window. The other dimension + properties (name, type and reference system) remain unchanged. + """ + return aggregate_spatial_window( + data=self, + reducer=build_child_callback(reducer, parent_parameters=['data', 'context']), + size=size, + boundary=boundary, + align=align, + context=context + ) + + @openeo_process + def aggregate_temporal(self, intervals, reducer, labels=UNSET, dimension=UNSET, context=UNSET) -> ProcessBuilder: + """ + Temporal aggregations + + :param self: A data cube. + :param intervals: Left-closed temporal intervals, which are allowed to overlap. Each temporal interval + in the array has exactly two elements: 1. The first element is the start of the temporal interval. The + specified time instant is **included** in the interval. 2. The second element is the end of the + temporal interval. The specified time instant is **excluded** from the interval. The second element + must always be greater/later than the first element, except when using time without date. Otherwise, a + `TemporalExtentEmpty` exception is thrown. + :param reducer: A reducer to be applied for the values contained in each interval. A reducer is a + single process such as ``mean()`` or a set of processes, which computes a single value for a list of + values, see the category 'reducer' for such processes. Intervals may not contain any values, which for + most reducers leads to no-data (`null`) values by default. + :param labels: Distinct labels for the intervals, which can contain dates and/or times. Is only + required to be specified if the values for the start of the temporal intervals are not distinct and + thus the default labels would not be unique. The number of labels and the number of groups need to be + equal. + :param dimension: The name of the temporal dimension for aggregation. All data along the dimension is + passed through the specified reducer. If the dimension is not set or set to `null`, the data cube is + expected to only have one temporal dimension. Fails with a `TooManyDimensions` exception if it has more + dimensions. Fails with a `DimensionNotAvailable` exception if the specified dimension does not exist. + :param context: Additional data to be passed to the reducer. + + :return: A new data cube with the same dimensions. The dimension properties (name, type, labels, + reference system and resolution) remain unchanged, except for the resolution and dimension labels of + the given temporal dimension. + """ + return aggregate_temporal( + data=self, + intervals=intervals, + reducer=build_child_callback(reducer, parent_parameters=['data', 'context']), + labels=labels, + dimension=dimension, + context=context + ) + + @openeo_process + def aggregate_temporal_period(self, period, reducer, dimension=UNSET, context=UNSET) -> ProcessBuilder: + """ + Temporal aggregations based on calendar hierarchies + + :param self: The source data cube. + :param period: The time intervals to aggregate. The following pre-defined values are available: * + `hour`: Hour of the day * `day`: Day of the year * `week`: Week of the year * `dekad`: Ten day periods, + counted per year with three periods per month (day 1 - 10, 11 - 20 and 21 - end of month). The third + dekad of the month can range from 8 to 11 days. For example, the third dekad of a year spans from + January 21 till January 31 (11 days), the fourth dekad spans from February 1 till February 10 (10 days) + and the sixth dekad spans from February 21 till February 28 or February 29 in a leap year (8 or 9 days + respectively). * `month`: Month of the year * `season`: Three month periods of the calendar seasons + (December - February, March - May, June - August, September - November). * `tropical-season`: Six month + periods of the tropical seasons (November - April, May - October). * `year`: Proleptic years * + `decade`: Ten year periods ([0-to-9 decade](https://en.wikipedia.org/wiki/Decade#0-to-9_decade)), from + a year ending in a 0 to the next year ending in a 9. * `decade-ad`: Ten year periods ([1-to-0 + decade](https://en.wikipedia.org/wiki/Decade#1-to-0_decade)) better aligned with the anno Domini (AD) + calendar era, from a year ending in a 1 to the next year ending in a 0. + :param reducer: A reducer to be applied for the values contained in each period. A reducer is a single + process such as ``mean()`` or a set of processes, which computes a single value for a list of values, + see the category 'reducer' for such processes. Periods may not contain any values, which for most + reducers leads to no-data (`null`) values by default. + :param dimension: The name of the temporal dimension for aggregation. All data along the dimension is + passed through the specified reducer. If the dimension is not set or set to `null`, the source data + cube is expected to only have one temporal dimension. Fails with a `TooManyDimensions` exception if it + has more dimensions. Fails with a `DimensionNotAvailable` exception if the specified dimension does not + exist. + :param context: Additional data to be passed to the reducer. + + :return: A new data cube with the same dimensions. The dimension properties (name, type, labels, + reference system and resolution) remain unchanged, except for the resolution and dimension labels of + the given temporal dimension. The specified temporal dimension has the following dimension labels + (`YYYY` = four-digit year, `MM` = two-digit month, `DD` two-digit day of month): * `hour`: `YYYY-MM- + DD-00` - `YYYY-MM-DD-23` * `day`: `YYYY-001` - `YYYY-365` * `week`: `YYYY-01` - `YYYY-52` * `dekad`: + `YYYY-00` - `YYYY-36` * `month`: `YYYY-01` - `YYYY-12` * `season`: `YYYY-djf` (December - February), + `YYYY-mam` (March - May), `YYYY-jja` (June - August), `YYYY-son` (September - November). * `tropical- + season`: `YYYY-ndjfma` (November - April), `YYYY-mjjaso` (May - October). * `year`: `YYYY` * `decade`: + `YYY0` * `decade-ad`: `YYY1` The dimension labels in the new data cube are complete for the whole + extent of the source data cube. For example, if `period` is set to `day` and the source data cube has + two dimension labels at the beginning of the year (`2020-01-01`) and the end of a year (`2020-12-31`), + the process returns a data cube with 365 dimension labels (`2020-001`, `2020-002`, ..., `2020-365`). In + contrast, if `period` is set to `day` and the source data cube has just one dimension label + `2020-01-05`, the process returns a data cube with just a single dimension label (`2020-005`). + """ + return aggregate_temporal_period( + data=self, + period=period, + reducer=build_child_callback(reducer, parent_parameters=['data', 'context']), + dimension=dimension, + context=context + ) + + @openeo_process + def all(self, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Are all of the values true? + + :param self: A set of boolean values. + :param ignore_nodata: Indicates whether no-data values are ignored or not and ignores them by default. + + :return: Boolean result of the logical operation. + """ + return all(data=self, ignore_nodata=ignore_nodata) + + @openeo_process + def and_(self, y) -> ProcessBuilder: + """ + Logical AND + + :param self: A boolean value. + :param y: A boolean value. + + :return: Boolean result of the logical AND. + """ + return and_(x=self, y=y) + + @openeo_process + def anomaly(self, normals, period) -> ProcessBuilder: + """ + Compute anomalies + + :param self: A data cube with exactly one temporal dimension and the following dimension labels for the + given period (`YYYY` = four-digit year, `MM` = two-digit month, `DD` two-digit day of month): * + `hour`: `YYYY-MM-DD-00` - `YYYY-MM-DD-23` * `day`: `YYYY-001` - `YYYY-365` * `week`: `YYYY-01` - + `YYYY-52` * `dekad`: `YYYY-00` - `YYYY-36` * `month`: `YYYY-01` - `YYYY-12` * `season`: `YYYY-djf` + (December - February), `YYYY-mam` (March - May), `YYYY-jja` (June - August), `YYYY-son` (September - + November). * `tropical-season`: `YYYY-ndjfma` (November - April), `YYYY-mjjaso` (May - October). * + `year`: `YYYY` * `decade`: `YYY0` * `decade-ad`: `YYY1` * `single-period` / `climatology-period`: Any + ``aggregate_temporal_period()`` can compute such a data cube. + :param normals: A data cube with normals, e.g. daily, monthly or yearly values computed from a process + such as ``climatological_normal()``. Must contain exactly one temporal dimension with the following + dimension labels for the given period: * `hour`: `00` - `23` * `day`: `001` - `365` * `week`: `01` - + `52` * `dekad`: `00` - `36` * `month`: `01` - `12` * `season`: `djf` (December - February), `mam` + (March - May), `jja` (June - August), `son` (September - November) * `tropical-season`: `ndjfma` + (November - April), `mjjaso` (May - October) * `year`: Four-digit year numbers * `decade`: Four-digit + year numbers, the last digit being a `0` * `decade-ad`: Four-digit year numbers, the last digit being a + `1` * `single-period` / `climatology-period`: A single dimension label with any name is expected. + :param period: Specifies the time intervals available in the normals data cube. The following options + are available: * `hour`: Hour of the day * `day`: Day of the year * `week`: Week of the year * + `dekad`: Ten day periods, counted per year with three periods per month (day 1 - 10, 11 - 20 and 21 - + end of month). The third dekad of the month can range from 8 to 11 days. For example, the fourth dekad + is Feb, 1 - Feb, 10 each year. * `month`: Month of the year * `season`: Three month periods of the + calendar seasons (December - February, March - May, June - August, September - November). * `tropical- + season`: Six month periods of the tropical seasons (November - April, May - October). * `year`: + Proleptic years * `decade`: Ten year periods ([0-to-9 + decade](https://en.wikipedia.org/wiki/Decade#0-to-9_decade)), from a year ending in a 0 to the next + year ending in a 9. * `decade-ad`: Ten year periods ([1-to-0 + decade](https://en.wikipedia.org/wiki/Decade#1-to-0_decade)) better aligned with the anno Domini (AD) + calendar era, from a year ending in a 1 to the next year ending in a 0. * `single-period` / + `climatology-period`: A single period of arbitrary length + + :return: A data cube with the same dimensions. The dimension properties (name, type, labels, reference + system and resolution) remain unchanged. + """ + return anomaly(data=self, normals=normals, period=period) + + @openeo_process + def any(self, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Is at least one value true? + + :param self: A set of boolean values. + :param ignore_nodata: Indicates whether no-data values are ignored or not and ignores them by default. + + :return: Boolean result of the logical operation. + """ + return any(data=self, ignore_nodata=ignore_nodata) + + @openeo_process + def apply(self, process, context=UNSET) -> ProcessBuilder: + """ + Apply a process to each value + + :param self: A data cube. + :param process: A process that accepts and returns a single value and is applied on each individual + value in the data cube. The process may consist of multiple sub-processes and could, for example, + consist of processes such as ``absolute()`` or ``linear_scale_range()``. + :param context: Additional data to be passed to the process. + + :return: A data cube with the newly computed values and the same dimensions. The dimension properties + (name, type, labels, reference system and resolution) remain unchanged. + """ + return apply(data=self, process=build_child_callback(process, parent_parameters=['x', 'context']), context=context) + + @openeo_process + def apply_dimension(self, process, dimension, target_dimension=UNSET, context=UNSET) -> ProcessBuilder: + """ + Apply a process to all values along a dimension + + :param self: A data cube. + :param process: Process to be applied on all values along the given dimension. The specified process + needs to accept an array and must return an array with at least one element. A process may consist of + multiple sub-processes. + :param dimension: The name of the source dimension to apply the process on. Fails with a + `DimensionNotAvailable` exception if the specified dimension does not exist. + :param target_dimension: The name of the target dimension or `null` (the default) to use the source + dimension specified in the parameter `dimension`. By specifying a target dimension, the source + dimension is removed. The target dimension with the specified name and the type `other` (see + ``add_dimension()``) is created, if it doesn't exist yet. + :param context: Additional data to be passed to the process. + + :return: A data cube with the newly computed values. All dimensions stay the same, except for the + dimensions specified in corresponding parameters. There are three cases how the dimensions can change: + 1. The source dimension is the target dimension: - The (number of) dimensions remain unchanged as + the source dimension is the target dimension. - The source dimension properties name and type remain + unchanged. - The dimension labels, the reference system and the resolution are preserved only if the + number of values in the source dimension is equal to the number of values computed by the process. + Otherwise, all other dimension properties change as defined in the list below. 2. The source dimension + is not the target dimension. The target dimension exists with a single label only: - The number of + dimensions decreases by one as the source dimension is 'dropped' and the target dimension is filled + with the processed data that originates from the source dimension. - The target dimension properties + name and type remain unchanged. All other dimension properties change as defined in the list below. 3. + The source dimension is not the target dimension and the latter does not exist: - The number of + dimensions remain unchanged, but the source dimension is replaced with the target dimension. - The + target dimension has the specified name and the type other. All other dimension properties are set as + defined in the list below. Unless otherwise stated above, for the given (target) dimension the + following applies: - the number of dimension labels is equal to the number of values computed by the + process, - the dimension labels are incrementing integers starting from zero, - the resolution changes, + and - the reference system is undefined. + """ + return apply_dimension( + data=self, + process=build_child_callback(process, parent_parameters=['data', 'context']), + dimension=dimension, + target_dimension=target_dimension, + context=context + ) + + @openeo_process + def apply_kernel(self, kernel, factor=UNSET, border=UNSET, replace_invalid=UNSET) -> ProcessBuilder: + """ + Apply a spatial convolution with a kernel + + :param self: A raster data cube. + :param kernel: Kernel as a two-dimensional array of weights. The inner level of the nested array aligns + with the `x` axis and the outer level aligns with the `y` axis. Each level of the kernel must have an + uneven number of elements, otherwise the process throws a `KernelDimensionsUneven` exception. + :param factor: A factor that is multiplied to each value after the kernel has been applied. This is + basically a shortcut for explicitly multiplying each value by a factor afterwards, which is often + required for some kernel-based algorithms such as the Gaussian blur. + :param border: Determines how the data is extended when the kernel overlaps with the borders. Defaults + to fill the border with zeroes. The following options are available: * *numeric value* - fill with a + user-defined constant number `n`: `nnnnnn|abcdefgh|nnnnnn` (default, with `n` = 0) * `replicate` - + repeat the value from the pixel at the border: `aaaaaa|abcdefgh|hhhhhh` * `reflect` - mirror/reflect + from the border: `fedcba|abcdefgh|hgfedc` * `reflect_pixel` - mirror/reflect from the center of the + pixel at the border: `gfedcb|abcdefgh|gfedcb` * `wrap` - repeat/wrap the image: + `cdefgh|abcdefgh|abcdef` + :param replace_invalid: This parameter specifies the value to replace non-numerical or infinite + numerical values with. By default, those values are replaced with zeroes. + + :return: A data cube with the newly computed values and the same dimensions. The dimension properties + (name, type, labels, reference system and resolution) remain unchanged. + """ + return apply_kernel(data=self, kernel=kernel, factor=factor, border=border, replace_invalid=replace_invalid) + + @openeo_process + def apply_neighborhood(self, process, size, overlap=UNSET, context=UNSET) -> ProcessBuilder: + """ + Apply a process to pixels in a n-dimensional neighborhood + + :param self: A raster data cube. + :param process: Process to be applied on all neighborhoods. + :param size: Neighborhood sizes along each dimension. This object maps dimension names to either a + physical measure (e.g. 100 m, 10 days) or pixels (e.g. 32 pixels). For dimensions not specified, the + default is to provide all values. Be aware that including all values from overly large dimensions may + not be processed at once. + :param overlap: Overlap of neighborhoods along each dimension to avoid border effects. By default no + overlap is provided. For instance a temporal dimension can add 1 month before and after a + neighborhood. In the spatial dimensions, this is often a number of pixels. The overlap specified is + added before and after, so an overlap of 8 pixels will add 8 pixels on both sides of the window, so 16 + in total. Be aware that large overlaps increase the need for computational resources and modifying + overlapping data in subsequent operations have no effect. + :param context: Additional data to be passed to the process. + + :return: A raster data cube with the newly computed values and the same dimensions. The dimension + properties (name, type, labels, reference system and resolution) remain unchanged. + """ + return apply_neighborhood( + data=self, + process=build_child_callback(process, parent_parameters=['data', 'context']), + size=size, + overlap=overlap, + context=context + ) + + @openeo_process + def apply_polygon(self, polygons, process, mask_value=UNSET, context=UNSET) -> ProcessBuilder: + """ + Apply a process to segments of the data cube + + :param self: A data cube. + :param polygons: A vector data cube containing at least one polygon. The provided vector data can be + one of the following: * A `Polygon` or `MultiPolygon` geometry, * a `Feature` with a `Polygon` or + `MultiPolygon` geometry, or * a `FeatureCollection` containing at least one `Feature` with `Polygon` or + `MultiPolygon` geometries. * Empty geometries are ignored. + :param process: A process that accepts and returns a single data cube and is applied on each individual + sub data cube. The process may consist of multiple sub-processes. + :param mask_value: All pixels for which the point at the pixel center **does not** intersect with the + polygon are replaced with the given value, which defaults to `null` (no data). It can provide a + distinction between no data values within the polygon and masked pixels outside of it. + :param context: Additional data to be passed to the process. + + :return: A data cube with the newly computed values and the same dimensions. The dimension properties + (name, type, labels, reference system and resolution) remain unchanged. + """ + return apply_polygon( + data=self, + polygons=polygons, + process=build_child_callback(process, parent_parameters=['data', 'context']), + mask_value=mask_value, + context=context + ) + + @openeo_process + def arccos(self) -> ProcessBuilder: + """ + Inverse cosine + + :param self: A number. + + :return: The computed angle in radians. + """ + return arccos(x=self) + + @openeo_process + def arcosh(self) -> ProcessBuilder: + """ + Inverse hyperbolic cosine + + :param self: A number. + + :return: The computed angle in radians. + """ + return arcosh(x=self) + + @openeo_process + def arcsin(self) -> ProcessBuilder: + """ + Inverse sine + + :param self: A number. + + :return: The computed angle in radians. + """ + return arcsin(x=self) + + @openeo_process + def arctan(self) -> ProcessBuilder: + """ + Inverse tangent + + :param self: A number. + + :return: The computed angle in radians. + """ + return arctan(x=self) + + @openeo_process + def arctan2(self, x) -> ProcessBuilder: + """ + Inverse tangent of two numbers + + :param self: A number to be used as the dividend. + :param x: A number to be used as the divisor. + + :return: The computed angle in radians. + """ + return arctan2(y=self, x=x) + + @openeo_process + def ard_normalized_radar_backscatter(self, elevation_model=UNSET, contributing_area=UNSET, ellipsoid_incidence_angle=UNSET, noise_removal=UNSET, options=UNSET) -> ProcessBuilder: + """ + CARD4L compliant SAR NRB generation + + :param self: The source data cube containing SAR input. + :param elevation_model: The digital elevation model to use. Set to `null` (the default) to allow the + back-end to choose, which will improve portability, but reduce reproducibility. + :param contributing_area: If set to `true`, a DEM-based local contributing area band named + `contributing_area` is added. The values are given in square meters. + :param ellipsoid_incidence_angle: If set to `true`, an ellipsoidal incidence angle band named + `ellipsoid_incidence_angle` is added. The values are given in degrees. + :param noise_removal: If set to `false`, no noise removal is applied. Defaults to `true`, which removes + noise. + :param options: Proprietary options for the backscatter computations. Specifying proprietary options + will reduce portability. + + :return: Backscatter values expressed as gamma0 in linear scale. In addition to the bands + `contributing_area` and `ellipsoid_incidence_angle` that can optionally be added with corresponding + parameters, the following bands are always added to the data cube: - `mask`: A data mask that + indicates which values are valid (1), invalid (0) or contain no-data (null). - `local_incidence_angle`: + A band with DEM-based local incidence angles in degrees. The data returned is CARD4L compliant with + corresponding metadata. + """ + return ard_normalized_radar_backscatter( + data=self, + elevation_model=elevation_model, + contributing_area=contributing_area, + ellipsoid_incidence_angle=ellipsoid_incidence_angle, + noise_removal=noise_removal, + options=options + ) + + @openeo_process + def ard_surface_reflectance(self, atmospheric_correction_method, cloud_detection_method, elevation_model=UNSET, atmospheric_correction_options=UNSET, cloud_detection_options=UNSET) -> ProcessBuilder: + """ + CARD4L compliant Surface Reflectance generation + + :param self: The source data cube containing multi-spectral optical top of the atmosphere (TOA) + reflectances. There must be a single dimension of type `bands` available. + :param atmospheric_correction_method: The atmospheric correction method to use. + :param cloud_detection_method: The cloud detection method to use. Each method supports detecting + different atmospheric disturbances such as clouds, cloud shadows, aerosols, haze, ozone and/or water + vapour in optical imagery. + :param elevation_model: The digital elevation model to use. Set to `null` (the default) to allow the + back-end to choose, which will improve portability, but reduce reproducibility. + :param atmospheric_correction_options: Proprietary options for the atmospheric correction method. + Specifying proprietary options will reduce portability. + :param cloud_detection_options: Proprietary options for the cloud detection method. Specifying + proprietary options will reduce portability. + + :return: Data cube containing bottom of atmosphere reflectances for each spectral band in the source + data cube, with atmospheric disturbances like clouds and cloud shadows removed. No-data values (null) + are directly set in the bands. Depending on the methods used, several additional bands will be added to + the data cube: Data cube containing bottom of atmosphere reflectances for each spectral band in the + source data cube, with atmospheric disturbances like clouds and cloud shadows removed. Depending on the + methods used, several additional bands will be added to the data cube: - `date` (optional): Specifies + per-pixel acquisition timestamps. - `incomplete-testing` (required): Identifies pixels with a value of + 1 for which the per-pixel tests (at least saturation, cloud and cloud shadows, see CARD4L specification + for details) have not all been successfully completed. Otherwise, the value is 0. - `saturation` + (required) / `saturation_{band}` (optional): Indicates where pixels in the input spectral bands are + saturated (1) or not (0). If the saturation is given per band, the band names are `saturation_{band}` + with `{band}` being the band name from the source data cube. - `cloud`, `shadow` (both + required),`aerosol`, `haze`, `ozone`, `water_vapor` (all optional): Indicates the probability of pixels + being an atmospheric disturbance such as clouds. All bands have values between 0 (clear) and 1, which + describes the probability that it is an atmospheric disturbance. - `snow-ice` (optional): Points to a + file that indicates whether a pixel is assessed as being snow/ice (1) or not (0). All values describe + the probability and must be between 0 and 1. - `land-water` (optional): Indicates whether a pixel is + assessed as being land (1) or water (0). All values describe the probability and must be between 0 and + 1. - `incidence-angle` (optional): Specifies per-pixel incidence angles in degrees. - `azimuth` + (optional): Specifies per-pixel azimuth angles in degrees. - `sun-azimuth:` (optional): Specifies per- + pixel sun azimuth angles in degrees. - `sun-elevation` (optional): Specifies per-pixel sun elevation + angles in degrees. - `terrain-shadow` (optional): Indicates with a value of 1 whether a pixel is not + directly illuminated due to terrain shadowing. Otherwise, the value is 0. - `terrain-occlusion` + (optional): Indicates with a value of 1 whether a pixel is not visible to the sensor due to terrain + occlusion during off-nadir viewing. Otherwise, the value is 0. - `terrain-illumination` (optional): + Contains coefficients used for terrain illumination correction are provided for each pixel. The data + returned is CARD4L compliant with corresponding metadata. + """ + return ard_surface_reflectance( + data=self, + atmospheric_correction_method=atmospheric_correction_method, + cloud_detection_method=cloud_detection_method, + elevation_model=elevation_model, + atmospheric_correction_options=atmospheric_correction_options, + cloud_detection_options=cloud_detection_options + ) + + @openeo_process + def array_append(self, value, label=UNSET) -> ProcessBuilder: + """ + Append a value to an array + + :param self: An array. + :param value: Value to append to the array. + :param label: If the given array is a labeled array, a new label for the new value should be given. If + not given or `null`, the array index as string is used as the label. If in any case the label exists, a + `LabelExists` exception is thrown. + + :return: The new array with the value being appended. + """ + return array_append(data=self, value=value, label=label) + + @openeo_process + def array_apply(self, process, context=UNSET) -> ProcessBuilder: + """ + Apply a process to each array element + + :param self: An array. + :param process: A process that accepts and returns a single value and is applied on each individual + value in the array. The process may consist of multiple sub-processes and could, for example, consist + of processes such as ``absolute()`` or ``linear_scale_range()``. + :param context: Additional data to be passed to the process. + + :return: An array with the newly computed values. The number of elements are the same as for the + original array. + """ + return array_apply( + data=self, + process=build_child_callback(process, parent_parameters=['x', 'index', 'label', 'context']), + context=context + ) + + @openeo_process + def array_concat(self, array2) -> ProcessBuilder: + """ + Merge two arrays + + :param self: The first array. + :param array2: The second array. + + :return: The merged array. + """ + return array_concat(array1=self, array2=array2) + + @openeo_process + def array_contains(self, value) -> ProcessBuilder: + """ + Check whether the array contains a given value + + :param self: List to find the value in. + :param value: Value to find in `data`. If the value is `null`, this process returns always `false`. + + :return: `true` if the list contains the value, false` otherwise. + """ + return array_contains(data=self, value=value) + + @openeo_process + def array_create(self=UNSET, repeat=UNSET) -> ProcessBuilder: + """ + Create an array + + :param self: A (native) array to fill the newly created array with. Defaults to an empty array. + :param repeat: The number of times the (native) array specified in `data` is repeatedly added after + each other to the new array being created. Defaults to `1`. + + :return: The newly created array. + """ + return array_create(data=self, repeat=repeat) + + @openeo_process + def array_create_labeled(self, labels) -> ProcessBuilder: + """ + Create a labeled array + + :param self: An array of values to be used. + :param labels: An array of labels to be used. + + :return: The newly created labeled array. + """ + return array_create_labeled(data=self, labels=labels) + + @openeo_process + def array_element(self, index=UNSET, label=UNSET, return_nodata=UNSET) -> ProcessBuilder: + """ + Get an element from an array + + :param self: An array. + :param index: The zero-based index of the element to retrieve. + :param label: The label of the element to retrieve. Throws an `ArrayNotLabeled` exception, if the given + array is not a labeled array and this parameter is set. + :param return_nodata: By default this process throws an `ArrayElementNotAvailable` exception if the + index or label is invalid. If you want to return `null` instead, set this flag to `true`. + + :return: The value of the requested element. + """ + return array_element(data=self, index=index, label=label, return_nodata=return_nodata) + + @openeo_process + def array_filter(self, condition, context=UNSET) -> ProcessBuilder: + """ + Filter an array based on a condition + + :param self: An array. + :param condition: A condition that is evaluated against each value, index and/or label in the array. + Only the array elements for which the condition returns `true` are preserved. + :param context: Additional data to be passed to the condition. + + :return: An array filtered by the specified condition. The number of elements are less than or equal + compared to the original array. + """ + return array_filter( + data=self, + condition=build_child_callback(condition, parent_parameters=['x', 'index', 'label', 'context']), + context=context + ) + + @openeo_process + def array_find(self, value, reverse=UNSET) -> ProcessBuilder: + """ + Get the index for a value in an array + + :param self: List to find the value in. + :param value: Value to find in `data`. If the value is `null`, this process returns always `null`. + :param reverse: By default, this process finds the index of the first match. To return the index of the + last match instead, set this flag to `true`. + + :return: The index of the first element with the specified value. If no element was found, `null` is + returned. + """ + return array_find(data=self, value=value, reverse=reverse) + + @openeo_process + def array_find_label(self, label) -> ProcessBuilder: + """ + Get the index for a label in a labeled array + + :param self: List to find the label in. + :param label: Label to find in `data`. + + :return: The index of the element with the specified label assigned. If no such label was found, `null` + is returned. + """ + return array_find_label(data=self, label=label) + + @openeo_process + def array_interpolate_linear(self) -> ProcessBuilder: + """ + One-dimensional linear interpolation for arrays + + :param self: An array of numbers and no-data values. If the given array is a labeled array, the labels + must have a natural/inherent label order and the process expects the labels to be sorted accordingly. + This is the default behavior in openEO for spatial and temporal dimensions. + + :return: An array with no-data values being replaced with interpolated values. If not at least 2 + numerical values are available in the array, the array stays the same. + """ + return array_interpolate_linear(data=self) + + @openeo_process + def array_labels(self) -> ProcessBuilder: + """ + Get the labels for an array + + :param self: An array. + + :return: The labels or indices as array. + """ + return array_labels(data=self) + + @openeo_process + def array_modify(self, values, index, length=UNSET) -> ProcessBuilder: + """ + Change the content of an array (remove, insert, update) + + :param self: The array to modify. + :param values: The values to insert into the `data` array. + :param index: The index in the `data` array of the element to insert the value(s) before. If the index + is greater than the number of elements in the `data` array, the process throws an + `ArrayElementNotAvailable` exception. To insert after the last element, there are two options: 1. Use + the simpler processes ``array_append()`` to append a single value or ``array_concat()`` to append + multiple values. 2. Specify the number of elements in the array. You can retrieve the number of + elements with the process ``count()``, having the parameter `condition` set to `true`. + :param length: The number of elements in the `data` array to remove (or replace) starting from the + given index. If the array contains fewer elements, the process simply removes all elements up to the + end. + + :return: An array with values added, updated or removed. + """ + return array_modify(data=self, values=values, index=index, length=length) + + @openeo_process + def arsinh(self) -> ProcessBuilder: + """ + Inverse hyperbolic sine + + :param self: A number. + + :return: The computed angle in radians. + """ + return arsinh(x=self) + + @openeo_process + def artanh(self) -> ProcessBuilder: + """ + Inverse hyperbolic tangent + + :param self: A number. + + :return: The computed angle in radians. + """ + return artanh(x=self) + + @openeo_process + def atmospheric_correction(self, method, elevation_model=UNSET, options=UNSET) -> ProcessBuilder: + """ + Apply atmospheric correction + + :param self: Data cube containing multi-spectral optical top of atmosphere reflectances to be + corrected. + :param method: The atmospheric correction method to use. To get reproducible results, you have to set a + specific method. Set to `null` to allow the back-end to choose, which will improve portability, but + reduce reproducibility as you *may* get different results if you run the processes multiple times. + :param elevation_model: The digital elevation model to use. Set to `null` (the default) to allow the + back-end to choose, which will improve portability, but reduce reproducibility. + :param options: Proprietary options for the atmospheric correction method. Specifying proprietary + options will reduce portability. + + :return: Data cube containing bottom of atmosphere reflectances. + """ + return atmospheric_correction(data=self, method=method, elevation_model=elevation_model, options=options) + + @openeo_process + def between(self, min, max, exclude_max=UNSET) -> ProcessBuilder: + """ + Between comparison + + :param self: The value to check. + :param min: Lower boundary (inclusive) to check against. + :param max: Upper boundary (inclusive) to check against. + :param exclude_max: Exclude the upper boundary `max` if set to `true`. Defaults to `false`. + + :return: `true` if `x` is between the specified bounds, otherwise `false`. + """ + return between(x=self, min=min, max=max, exclude_max=exclude_max) + + @openeo_process + def ceil(self) -> ProcessBuilder: + """ + Round fractions up + + :param self: A number to round up. + + :return: The number rounded up. + """ + return ceil(x=self) + + @openeo_process + def climatological_normal(self, period, climatology_period=UNSET) -> ProcessBuilder: + """ + Compute climatology normals + + :param self: A data cube with exactly one temporal dimension. The data cube must span at least the + temporal interval specified in the parameter `climatology-period`. Seasonal periods may span two + consecutive years, e.g. temporal winter that includes months December, January and February. If the + required months before the actual climate period are available, the season is taken into account. If + not available, the first season is not taken into account and the seasonal mean is based on one year + less than the other seasonal normals. The incomplete season at the end of the last year is never taken + into account. + :param period: The time intervals to aggregate the average value for. The following pre-defined + frequencies are supported: * `day`: Day of the year * `month`: Month of the year * `climatology- + period`: The period specified in the `climatology-period`. * `season`: Three month periods of the + calendar seasons (December - February, March - May, June - August, September - November). * `tropical- + season`: Six month periods of the tropical seasons (November - April, May - October). + :param climatology_period: The climatology period as a closed temporal interval. The first element of + the array is the first year to be fully included in the temporal interval. The second element is the + last year to be fully included in the temporal interval. The default climatology period is from 1981 + until 2010 (both inclusive) right now, but this might be updated over time to what is commonly used in + climatology. If you don't want to keep your research to be reproducible, please explicitly specify a + period. + + :return: A data cube with the same dimensions. The dimension properties (name, type, labels, reference + system and resolution) remain unchanged, except for the resolution and dimension labels of the temporal + dimension. The temporal dimension has the following dimension labels: * `day`: `001` - `365` * + `month`: `01` - `12` * `climatology-period`: `climatology-period` * `season`: `djf` (December - + February), `mam` (March - May), `jja` (June - August), `son` (September - November) * `tropical- + season`: `ndjfma` (November - April), `mjjaso` (May - October) + """ + return climatological_normal(data=self, period=period, climatology_period=climatology_period) + + @openeo_process + def clip(self, min, max) -> ProcessBuilder: + """ + Clip a value between a minimum and a maximum + + :param self: A number. + :param min: Minimum value. If the value is lower than this value, the process will return the value of + this parameter. + :param max: Maximum value. If the value is greater than this value, the process will return the value + of this parameter. + + :return: The value clipped to the specified range. + """ + return clip(x=self, min=min, max=max) + + @openeo_process + def cloud_detection(self, method, options=UNSET) -> ProcessBuilder: + """ + Create cloud masks + + :param self: The source data cube containing multi-spectral optical top of the atmosphere (TOA) + reflectances on which to perform cloud detection. + :param method: The cloud detection method to use. To get reproducible results, you have to set a + specific method. Set to `null` to allow the back-end to choose, which will improve portability, but + reduce reproducibility as you *may* get different results if you run the processes multiple times. + :param options: Proprietary options for the cloud detection method. Specifying proprietary options will + reduce portability. + + :return: A data cube with bands for the atmospheric disturbances. Each of the masks contains values + between 0 and 1. The data cube has the same spatial and temporal dimensions as the source data cube and + a dimension that contains a dimension label for each of the supported/considered atmospheric + disturbance. + """ + return cloud_detection(data=self, method=method, options=options) + + @openeo_process + def constant(self) -> ProcessBuilder: + """ + Define a constant value + + :param self: The value of the constant. + + :return: The value of the constant. + """ + return constant(x=self) + + @openeo_process + def cos(self) -> ProcessBuilder: + """ + Cosine + + :param self: An angle in radians. + + :return: The computed cosine of `x`. + """ + return cos(x=self) + + @openeo_process + def cosh(self) -> ProcessBuilder: + """ + Hyperbolic cosine + + :param self: An angle in radians. + + :return: The computed hyperbolic cosine of `x`. + """ + return cosh(x=self) + + @openeo_process + def count(self, condition=UNSET, context=UNSET) -> ProcessBuilder: + """ + Count the number of elements + + :param self: An array with elements of any data type. + :param condition: A condition consists of one or more processes, which in the end return a boolean + value. It is evaluated against each element in the array. An element is counted only if the condition + returns `true`. Defaults to count valid elements in a list (see ``is_valid()``). Setting this parameter + to boolean `true` counts all elements in the list. `false` is not a valid value for this parameter. + :param context: Additional data to be passed to the condition. + + :return: The counted number of elements. + """ + return count(data=self, condition=condition, context=context) + + @openeo_process + def create_data_cube(self) -> ProcessBuilder: + """ + Create an empty data cube + + :return: An empty data cube with no dimensions. + """ + return create_data_cube() + + @openeo_process + def cummax(self, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Cumulative maxima + + :param self: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not and ignores them by default. + Setting this flag to `false` considers no-data values so that `null` is set for all the following + elements. + + :return: An array with the computed cumulative maxima. + """ + return cummax(data=self, ignore_nodata=ignore_nodata) + + @openeo_process + def cummin(self, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Cumulative minima + + :param self: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not and ignores them by default. + Setting this flag to `false` considers no-data values so that `null` is set for all the following + elements. + + :return: An array with the computed cumulative minima. + """ + return cummin(data=self, ignore_nodata=ignore_nodata) + + @openeo_process + def cumproduct(self, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Cumulative products + + :param self: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not and ignores them by default. + Setting this flag to `false` considers no-data values so that `null` is set for all the following + elements. + + :return: An array with the computed cumulative products. + """ + return cumproduct(data=self, ignore_nodata=ignore_nodata) + + @openeo_process + def cumsum(self, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Cumulative sums + + :param self: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not and ignores them by default. + Setting this flag to `false` considers no-data values so that `null` is set for all the following + elements. + + :return: An array with the computed cumulative sums. + """ + return cumsum(data=self, ignore_nodata=ignore_nodata) + + @openeo_process + def date_between(self, min, max, exclude_max=UNSET) -> ProcessBuilder: + """ + Between comparison for dates and times + + :param self: The value to check. + :param min: Lower boundary (inclusive) to check against. + :param max: Upper boundary (inclusive) to check against. + :param exclude_max: Exclude the upper boundary `max` if set to `true`. Defaults to `false`. + + :return: `true` if `x` is between the specified bounds, otherwise `false`. + """ + return date_between(x=self, min=min, max=max, exclude_max=exclude_max) + + @openeo_process + def date_difference(self, date2, unit=UNSET) -> ProcessBuilder: + """ + Computes the difference between two time instants + + :param self: The base date, optionally with a time component. + :param date2: The other date, optionally with a time component. + :param unit: The unit for the returned value. The following units are available: - millisecond - + second - leap seconds are ignored in computations. - minute - hour - day - month - year + + :return: Returns the difference between date1 and date2 in the given unit (seconds by default), + including a fractional part if required. For comparison purposes this means: - If `date1` < `date2`, + the returned value is positive. - If `date1` = `date2`, the returned value is 0. - If `date1` > + `date2`, the returned value is negative. + """ + return date_difference(date1=self, date2=date2, unit=unit) + + @openeo_process + def date_shift(self, value, unit) -> ProcessBuilder: + """ + Manipulates dates and times by addition or subtraction + + :param self: The date (and optionally time) to manipulate. If the given date doesn't include the time, + the process assumes that the time component is `00:00:00Z` (i.e. midnight, in UTC). The millisecond + part of the time is optional and defaults to `0` if not given. + :param value: The period of time in the unit given that is added (positive numbers) or subtracted + (negative numbers). The value `0` doesn't have any effect. + :param unit: The unit for the value given. The following pre-defined units are available: - + millisecond: Milliseconds - second: Seconds - leap seconds are ignored in computations. - minute: + Minutes - hour: Hours - day: Days - changes only the the day part of a date - week: Weeks (equivalent + to 7 days) - month: Months - year: Years Manipulations with the unit `year`, `month`, `week` or `day` + do never change the time. If any of the manipulations result in an invalid date or time, the + corresponding part is rounded down to the next valid date or time respectively. For example, adding a + month to `2020-01-31` would result in `2020-02-29`. + + :return: The manipulated date. If a time component was given in the parameter `date`, the time + component is returned with the date. + """ + return date_shift(date=self, value=value, unit=unit) + + @openeo_process + def dimension_labels(self, dimension) -> ProcessBuilder: + """ + Get the dimension labels + + :param self: The data cube. + :param dimension: The name of the dimension to get the labels for. + + :return: The labels as an array. + """ + return dimension_labels(data=self, dimension=dimension) + + @openeo_process + def divide(self, y) -> ProcessBuilder: + """ + Division of two numbers + + :param self: The dividend. + :param y: The divisor. + + :return: The computed result. + """ + return divide(x=self, y=y) + + @openeo_process + def drop_dimension(self, name) -> ProcessBuilder: + """ + Remove a dimension + + :param self: The data cube to drop a dimension from. + :param name: Name of the dimension to drop. + + :return: A data cube without the specified dimension. The number of dimensions decreases by one, but + the dimension properties (name, type, labels, reference system and resolution) for all other dimensions + remain unchanged. + """ + return drop_dimension(data=self, name=name) + + @openeo_process + def e(self) -> ProcessBuilder: + """ + Euler's number (e) + + :return: The numerical value of Euler's number. + """ + return e() + + @openeo_process + def eq(self, y, delta=UNSET, case_sensitive=UNSET) -> ProcessBuilder: + """ + Equal to comparison + + :param self: First operand. + :param y: Second operand. + :param delta: Only applicable for comparing two numbers. If this optional parameter is set to a + positive non-zero number the equality of two numbers is checked against a delta value. This is + especially useful to circumvent problems with floating-point inaccuracy in machine-based computation. + This option is basically an alias for the following computation: `lte(abs(minus([x, y]), delta)` + :param case_sensitive: Only applicable for comparing two strings. Case sensitive comparison can be + disabled by setting this parameter to `false`. + + :return: `true` if `x` is equal to `y`, `null` if any operand is `null`, otherwise `false`. + """ + return eq(x=self, y=y, delta=delta, case_sensitive=case_sensitive) + + @openeo_process + def exp(self) -> ProcessBuilder: + """ + Exponentiation to the base e + + :param self: The numerical exponent. + + :return: The computed value for *e* raised to the power of `p`. + """ + return exp(p=self) + + @openeo_process + def extrema(self, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Minimum and maximum values + + :param self: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. + Setting this flag to `false` considers no-data values so that an array with two `null` values is + returned if any value is such a value. + + :return: An array containing the minimum and maximum values for the specified numbers. The first + element is the minimum, the second element is the maximum. If the input array is empty both elements + are set to `null`. + """ + return extrema(data=self, ignore_nodata=ignore_nodata) + + @openeo_process + def filter_bands(self, bands=UNSET, wavelengths=UNSET) -> ProcessBuilder: + """ + Filter the bands by names + + :param self: A data cube with bands. + :param bands: A list of band names. Either the unique band name (metadata field `name` in bands) or one + of the common band names (metadata field `common_name` in bands). If the unique band name and the + common name conflict, the unique band name has a higher priority. The order of the specified array + defines the order of the bands in the data cube. If multiple bands match a common name, all matched + bands are included in the original order. + :param wavelengths: A list of sub-lists with each sub-list consisting of two elements. The first + element is the minimum wavelength and the second element is the maximum wavelength. Wavelengths are + specified in micrometers (μm). The order of the specified array defines the order of the bands in the + data cube. If multiple bands match the wavelengths, all matched bands are included in the original + order. + + :return: A data cube limited to a subset of its original bands. The dimensions and dimension properties + (name, type, labels, reference system and resolution) remain unchanged, except that the dimension of + type `bands` has less (or the same) dimension labels. + """ + return filter_bands(data=self, bands=bands, wavelengths=wavelengths) + + @openeo_process + def filter_bbox(self, extent) -> ProcessBuilder: + """ + Spatial filter using a bounding box + + :param self: A data cube. + :param extent: A bounding box, which may include a vertical axis (see `base` and `height`). + + :return: A data cube restricted to the bounding box. The dimensions and dimension properties (name, + type, labels, reference system and resolution) remain unchanged, except that the spatial dimensions + have less (or the same) dimension labels. + """ + return filter_bbox(data=self, extent=extent) + + @openeo_process + def filter_labels(self, condition, dimension, context=UNSET) -> ProcessBuilder: + """ + Filter dimension labels based on a condition + + :param self: A data cube. + :param condition: A condition that is evaluated against each dimension label in the specified + dimension. A dimension label and the corresponding data is preserved for the given dimension, if the + condition returns `true`. + :param dimension: The name of the dimension to filter on. Fails with a `DimensionNotAvailable` + exception if the specified dimension does not exist. + :param context: Additional data to be passed to the condition. + + :return: A data cube with the same dimensions. The dimension properties (name, type, labels, reference + system and resolution) remain unchanged, except that the given dimension has less (or the same) + dimension labels. + """ + return filter_labels( + data=self, + condition=build_child_callback(condition, parent_parameters=['value', 'context']), + dimension=dimension, + context=context + ) + + @openeo_process + def filter_spatial(self, geometries) -> ProcessBuilder: + """ + Spatial filter raster data cubes using geometries + + :param self: A raster data cube. + :param geometries: One or more geometries used for filtering, given as GeoJSON or vector data cube. If + multiple geometries are provided, the union of them is used. Empty geometries are ignored. Limits the + data cube to the bounding box of the given geometries. No implicit masking gets applied. To mask the + pixels of the data cube use ``mask_polygon()``. + + :return: A raster data cube restricted to the specified geometries. The dimensions and dimension + properties (name, type, labels, reference system and resolution) remain unchanged, except that the + spatial dimensions have less (or the same) dimension labels. + """ + return filter_spatial(data=self, geometries=geometries) + + @openeo_process + def filter_temporal(self, extent, dimension=UNSET) -> ProcessBuilder: + """ + Temporal filter based on temporal intervals + + :param self: A data cube. + :param extent: Left-closed temporal interval, i.e. an array with exactly two elements: 1. The first + element is the start of the temporal interval. The specified time instant is **included** in the + interval. 2. The second element is the end of the temporal interval. The specified time instant is + **excluded** from the interval. The second element must always be greater/later than the first + element. Otherwise, a `TemporalExtentEmpty` exception is thrown. Also supports unbounded intervals by + setting one of the boundaries to `null`, but never both. + :param dimension: The name of the temporal dimension to filter on. If no specific dimension is + specified, the filter applies to all temporal dimensions. Fails with a `DimensionNotAvailable` + exception if the specified dimension does not exist. + + :return: A data cube restricted to the specified temporal extent. The dimensions and dimension + properties (name, type, labels, reference system and resolution) remain unchanged, except that the + temporal dimensions (determined by `dimensions` parameter) may have less dimension labels. + """ + return filter_temporal(data=self, extent=extent, dimension=dimension) + + @openeo_process + def filter_vector(self, geometries, relation=UNSET) -> ProcessBuilder: + """ + Spatial vector filter using geometries + + :param self: A vector data cube with the candidate geometries. + :param geometries: One or more base geometries used for filtering, given as vector data cube. If + multiple base geometries are provided, the union of them is used. + :param relation: The spatial filter predicate for comparing the geometries provided through (a) + `geometries` (base geometries) and (b) `data` (candidate geometries). + + :return: A vector data cube restricted to the specified geometries. The dimensions and dimension + properties (name, type, labels, reference system and resolution) remain unchanged, except that the + geometries dimension has less (or the same) dimension labels. + """ + return filter_vector(data=self, geometries=geometries, relation=relation) + + @openeo_process + def first(self, ignore_nodata=UNSET) -> ProcessBuilder: + """ + First element + + :param self: An array with elements of any data type. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. + Setting this flag to `false` considers no-data values so that `null` is returned if the first value is + such a value. + + :return: The first element of the input array. + """ + return first(data=self, ignore_nodata=ignore_nodata) + + @openeo_process + def fit_curve(self, parameters, function, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Curve fitting + + :param self: A labeled array, the labels correspond to the variable `y` and the values correspond to + the variable `x`. + :param parameters: Defined the number of parameters for the model function and provides an initial + guess for them. At least one parameter is required. + :param function: The model function. It must take the parameters to fit as array through the first + argument and the independent variable `x` as the second argument. It is recommended to store the model + function as a user-defined process on the back-end to be able to re-use the model function with the + computed optimal values for the parameters afterwards. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. + Setting this flag to `false` considers no-data values so that `null` is passed to the model function. + + :return: An array with the optimal values for the parameters. + """ + return fit_curve( + data=self, + parameters=parameters, + function=build_child_callback(function, parent_parameters=['x', 'parameters']), + ignore_nodata=ignore_nodata + ) + + @openeo_process + def flatten_dimensions(self, dimensions, target_dimension, label_separator=UNSET) -> ProcessBuilder: + """ + Combine multiple dimensions into a single dimension + + :param self: A data cube. + :param dimensions: The names of the dimension to combine. The order of the array defines the order in + which the dimension labels and values are combined (see the example in the process description). Fails + with a `DimensionNotAvailable` exception if at least one of the specified dimensions does not exist. + :param target_dimension: The name of the new target dimension. A new dimensions will be created with + the given names and type `other` (see ``add_dimension()``). Fails with a `TargetDimensionExists` + exception if a dimension with the specified name exists. + :param label_separator: The string that will be used as a separator for the concatenated dimension + labels. To unambiguously revert the dimension labels with the process ``unflatten_dimension()``, the + given string must not be contained in any of the dimension labels. + + :return: A data cube with the new shape. The dimension properties (name, type, labels, reference system + and resolution) for all other dimensions remain unchanged. + """ + return flatten_dimensions(data=self, dimensions=dimensions, target_dimension=target_dimension, label_separator=label_separator) + + @openeo_process + def floor(self) -> ProcessBuilder: + """ + Round fractions down + + :param self: A number to round down. + + :return: The number rounded down. + """ + return floor(x=self) + + @openeo_process + def gt(self, y) -> ProcessBuilder: + """ + Greater than comparison + + :param self: First operand. + :param y: Second operand. + + :return: `true` if `x` is strictly greater than `y` or `null` if any operand is `null`, otherwise + `false`. + """ + return gt(x=self, y=y) + + @openeo_process + def gte(self, y) -> ProcessBuilder: + """ + Greater than or equal to comparison + + :param self: First operand. + :param y: Second operand. + + :return: `true` if `x` is greater than or equal to `y`, `null` if any operand is `null`, otherwise + `false`. + """ + return gte(x=self, y=y) + + @openeo_process + def if_(self, accept, reject=UNSET) -> ProcessBuilder: + """ + If-Then-Else conditional + + :param self: A boolean value. + :param accept: A value that is returned if the boolean value is `true`. + :param reject: A value that is returned if the boolean value is **not** `true`. Defaults to `null`. + + :return: Either the `accept` or `reject` argument depending on the given boolean value. + """ + return if_(value=self, accept=accept, reject=reject) + + @openeo_process + def inspect(self, message=UNSET, code=UNSET, level=UNSET) -> ProcessBuilder: + """ + Add information to the logs + + :param self: Data to log. + :param message: A message to send in addition to the data. + :param code: A label to help identify one or more log entries originating from this process in the list + of all log entries. It can help to group or filter log entries and is usually not unique. + :param level: The severity level of this message, defaults to `info`. + + :return: The data as passed to the `data` parameter without any modification. + """ + return inspect(data=self, message=message, code=code, level=level) + + @openeo_process + def int(self) -> ProcessBuilder: + """ + Integer part of a number + + :param self: A number. + + :return: Integer part of the number. + """ + return int(x=self) + + @openeo_process + def is_infinite(self) -> ProcessBuilder: + """ + Value is an infinite number + + :param self: The data to check. + + :return: `true` if the data is an infinite number, otherwise `false`. + """ + return is_infinite(x=self) + + @openeo_process + def is_nan(self) -> ProcessBuilder: + """ + Value is not a number + + :param self: The data to check. + + :return: Returns `true` for `NaN` and all non-numeric data types, otherwise returns `false`. + """ + return is_nan(x=self) + + @openeo_process + def is_nodata(self) -> ProcessBuilder: + """ + Value is a no-data value + + :param self: The data to check. + + :return: `true` if the data is a no-data value, otherwise `false`. + """ + return is_nodata(x=self) + + @openeo_process + def is_valid(self) -> ProcessBuilder: + """ + Value is valid data + + :param self: The data to check. + + :return: `true` if the data is valid, otherwise `false`. + """ + return is_valid(x=self) + + @openeo_process + def last(self, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Last element + + :param self: An array with elements of any data type. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. + Setting this flag to `false` considers no-data values so that `null` is returned if the last value is + such a value. + + :return: The last element of the input array. + """ + return last(data=self, ignore_nodata=ignore_nodata) + + @openeo_process + def linear_scale_range(self, inputMin, inputMax, outputMin=UNSET, outputMax=UNSET) -> ProcessBuilder: + """ + Linear transformation between two ranges + + :param self: A number to transform. The number gets clipped to the bounds specified in `inputMin` and + `inputMax`. + :param inputMin: Minimum value the input can obtain. + :param inputMax: Maximum value the input can obtain. + :param outputMin: Minimum value of the desired output range. + :param outputMax: Maximum value of the desired output range. + + :return: The transformed number. + """ + return linear_scale_range(x=self, inputMin=inputMin, inputMax=inputMax, outputMin=outputMin, outputMax=outputMax) + + @openeo_process + def ln(self) -> ProcessBuilder: + """ + Natural logarithm + + :param self: A number to compute the natural logarithm for. + + :return: The computed natural logarithm. + """ + return ln(x=self) + + @openeo_process + def load_collection(self, spatial_extent, temporal_extent, bands=UNSET, properties=UNSET) -> ProcessBuilder: + """ + Load a collection + + :param self: The collection id. + :param spatial_extent: Limits the data to load from the collection to the specified bounding box or + polygons. * For raster data, the process loads the pixel into the data cube if the point at the pixel + center intersects with the bounding box or any of the polygons (as defined in the Simple Features + standard by the OGC). * For vector data, the process loads the geometry into the data cube if the + geometry is fully *within* the bounding box or any of the polygons (as defined in the Simple Features + standard by the OGC). Empty geometries may only be in the data cube if no spatial extent has been + provided. The GeoJSON can be one of the following feature types: * A `Polygon` or `MultiPolygon` + geometry, * a `Feature` with a `Polygon` or `MultiPolygon` geometry, or * a `FeatureCollection` + containing at least one `Feature` with `Polygon` or `MultiPolygon` geometries. * Empty geometries are + ignored. Set this parameter to `null` to set no limit for the spatial extent. Be careful with this + when loading large datasets! It is recommended to use this parameter instead of using ``filter_bbox()`` + or ``filter_spatial()`` directly after loading unbounded data. + :param temporal_extent: Limits the data to load from the collection to the specified left-closed + temporal interval. Applies to all temporal dimensions. The interval has to be specified as an array + with exactly two elements: 1. The first element is the start of the temporal interval. The specified + time instant is **included** in the interval. 2. The second element is the end of the temporal + interval. The specified time instant is **excluded** from the interval. The second element must always + be greater/later than the first element. Otherwise, a `TemporalExtentEmpty` exception is thrown. Also + supports unbounded intervals by setting one of the boundaries to `null`, but never both. Set this + parameter to `null` to set no limit for the temporal extent. Be careful with this when loading large + datasets! It is recommended to use this parameter instead of using ``filter_temporal()`` directly after + loading unbounded data. + :param bands: Only adds the specified bands into the data cube so that bands that don't match the list + of band names are not available. Applies to all dimensions of type `bands`. Either the unique band + name (metadata field `name` in bands) or one of the common band names (metadata field `common_name` in + bands) can be specified. If the unique band name and the common name conflict, the unique band name has + a higher priority. The order of the specified array defines the order of the bands in the data cube. + If multiple bands match a common name, all matched bands are included in the original order. It is + recommended to use this parameter instead of using ``filter_bands()`` directly after loading unbounded + data. + :param properties: Limits the data by metadata properties to include only data in the data cube which + all given conditions return `true` for (AND operation). Specify key-value-pairs with the key being the + name of the metadata property, which can be retrieved with the openEO Data Discovery for Collections. + The value must be a condition (user-defined process) to be evaluated against the collection metadata, + see the example. + + :return: A data cube for further processing. The dimensions and dimension properties (name, type, + labels, reference system and resolution) correspond to the collection's metadata, but the dimension + labels are restricted as specified in the parameters. + """ + return load_collection(id=self, spatial_extent=spatial_extent, temporal_extent=temporal_extent, bands=bands, properties=properties) + + @openeo_process + def load_geojson(self, properties=UNSET) -> ProcessBuilder: + """ + Converts GeoJSON into a vector data cube + + :param self: A GeoJSON object to convert into a vector data cube. The GeoJSON type `GeometryCollection` + is not supported. Each geometry in the GeoJSON data results in a dimension label in the `geometries` + dimension. + :param properties: A list of properties from the GeoJSON file to construct an additional dimension + from. A new dimension with the name `properties` and type `other` is created if at least one property + is provided. Only applies for GeoJSON Features and FeatureCollections. Missing values are generally set + to no-data (`null`). Depending on the number of properties provided, the process creates the dimension + differently: - Single property with scalar values: A single dimension label with the name of the + property and a single value per geometry. - Single property of type array: The dimension labels + correspond to the array indices. There are as many values and labels per geometry as there are for the + largest array. - Multiple properties with scalar values: The dimension labels correspond to the + property names. There are as many values and labels per geometry as there are properties provided here. + + :return: A vector data cube containing the geometries, either one or two dimensional. + """ + return load_geojson(data=self, properties=properties) + + @openeo_process + def load_ml_model(self) -> ProcessBuilder: + """ + Load a ML model + + :param self: The STAC Item to load the machine learning model from. The STAC Item must implement the + `ml-model` extension. + + :return: A machine learning model to be used with machine learning processes such as + ``predict_random_forest()``. + """ + return load_ml_model(id=self) + + @openeo_process + def load_result(self, spatial_extent=UNSET, temporal_extent=UNSET, bands=UNSET) -> ProcessBuilder: + """ + Load batch job results + + :param self: The id of a batch job with results. + :param spatial_extent: Limits the data to load from the batch job result to the specified bounding box + or polygons. * For raster data, the process loads the pixel into the data cube if the point at the + pixel center intersects with the bounding box or any of the polygons (as defined in the Simple Features + standard by the OGC). * For vector data, the process loads the geometry into the data cube of the + geometry is fully within the bounding box or any of the polygons (as defined in the Simple Features + standard by the OGC). Empty geometries may only be in the data cube if no spatial extent has been + provided. The GeoJSON can be one of the following feature types: * A `Polygon` or `MultiPolygon` + geometry, * a `Feature` with a `Polygon` or `MultiPolygon` geometry, or * a `FeatureCollection` + containing at least one `Feature` with `Polygon` or `MultiPolygon` geometries. Set this parameter to + `null` to set no limit for the spatial extent. Be careful with this when loading large datasets! It is + recommended to use this parameter instead of using ``filter_bbox()`` or ``filter_spatial()`` directly + after loading unbounded data. + :param temporal_extent: Limits the data to load from the batch job result to the specified left-closed + temporal interval. Applies to all temporal dimensions. The interval has to be specified as an array + with exactly two elements: 1. The first element is the start of the temporal interval. The specified + instance in time is **included** in the interval. 2. The second element is the end of the temporal + interval. The specified instance in time is **excluded** from the interval. The specified temporal + strings follow [RFC 3339](https://www.rfc-editor.org/rfc/rfc3339.html). Also supports open intervals by + setting one of the boundaries to `null`, but never both. Set this parameter to `null` to set no limit + for the temporal extent. Be careful with this when loading large datasets! It is recommended to use + this parameter instead of using ``filter_temporal()`` directly after loading unbounded data. + :param bands: Only adds the specified bands into the data cube so that bands that don't match the list + of band names are not available. Applies to all dimensions of type `bands`. Either the unique band + name (metadata field `name` in bands) or one of the common band names (metadata field `common_name` in + bands) can be specified. If the unique band name and the common name conflict, the unique band name has + a higher priority. The order of the specified array defines the order of the bands in the data cube. + If multiple bands match a common name, all matched bands are included in the original order. It is + recommended to use this parameter instead of using ``filter_bands()`` directly after loading unbounded + data. + + :return: A data cube for further processing. + """ + return load_result(id=self, spatial_extent=spatial_extent, temporal_extent=temporal_extent, bands=bands) + + @openeo_process + def load_stac(self, spatial_extent=UNSET, temporal_extent=UNSET, bands=UNSET, properties=UNSET) -> ProcessBuilder: + """ + Loads data from STAC + + :param self: The URL to a static STAC catalog (STAC Item, STAC Collection, or STAC Catalog) or a + specific STAC API Collection that allows to filter items and to download assets. This includes batch + job results, which itself are compliant to STAC. For external URLs, authentication details such as API + keys or tokens may need to be included in the URL. Batch job results can be specified in two ways: - + For Batch job results at the same back-end, a URL pointing to the corresponding batch job results + endpoint should be provided. The URL usually ends with `/jobs/{id}/results` and `{id}` is the + corresponding batch job ID. - For external results, a signed URL must be provided. Not all back-ends + support signed URLs, which are provided as a link with the link relation `canonical` in the batch job + result metadata. + :param spatial_extent: Limits the data to load to the specified bounding box or polygons. * For raster + data, the process loads the pixel into the data cube if the point at the pixel center intersects with + the bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). * For + vector data, the process loads the geometry into the data cube if the geometry is fully within the + bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). Empty + geometries may only be in the data cube if no spatial extent has been provided. The GeoJSON can be one + of the following feature types: * A `Polygon` or `MultiPolygon` geometry, * a `Feature` with a + `Polygon` or `MultiPolygon` geometry, or * a `FeatureCollection` containing at least one `Feature` with + `Polygon` or `MultiPolygon` geometries. Set this parameter to `null` to set no limit for the spatial + extent. Be careful with this when loading large datasets! It is recommended to use this parameter + instead of using ``filter_bbox()`` or ``filter_spatial()`` directly after loading unbounded data. + :param temporal_extent: Limits the data to load to the specified left-closed temporal interval. Applies + to all temporal dimensions. The interval has to be specified as an array with exactly two elements: 1. + The first element is the start of the temporal interval. The specified instance in time is **included** + in the interval. 2. The second element is the end of the temporal interval. The specified instance in + time is **excluded** from the interval. The second element must always be greater/later than the first + element. Otherwise, a `TemporalExtentEmpty` exception is thrown. Also supports open intervals by + setting one of the boundaries to `null`, but never both. Set this parameter to `null` to set no limit + for the temporal extent. Be careful with this when loading large datasets! It is recommended to use + this parameter instead of using ``filter_temporal()`` directly after loading unbounded data. + :param bands: Only adds the specified bands into the data cube so that bands that don't match the list + of band names are not available. Applies to all dimensions of type `bands`. Either the unique band + name (metadata field `name` in bands) or one of the common band names (metadata field `common_name` in + bands) can be specified. If the unique band name and the common name conflict, the unique band name has + a higher priority. The order of the specified array defines the order of the bands in the data cube. + If multiple bands match a common name, all matched bands are included in the original order. It is + recommended to use this parameter instead of using ``filter_bands()`` directly after loading unbounded + data. + :param properties: Limits the data by metadata properties to include only data in the data cube which + all given conditions return `true` for (AND operation). Specify key-value-pairs with the key being the + name of the metadata property, which can be retrieved with the openEO Data Discovery for Collections. + The value must be a condition (user-defined process) to be evaluated against a STAC API. This parameter + is not supported for static STAC. + + :return: A data cube for further processing. + """ + return load_stac(url=self, spatial_extent=spatial_extent, temporal_extent=temporal_extent, bands=bands, properties=properties) + + @openeo_process + def load_uploaded_files(self, format, options=UNSET) -> ProcessBuilder: + """ + Load files from the user workspace + + :param self: The files to read. Folders can't be specified, specify all files instead. An exception is + thrown if a file can't be read. + :param format: The file format to read from. It must be one of the values that the server reports as + supported input file formats, which usually correspond to the short GDAL/OGR codes. If the format is + not suitable for loading the data, a `FormatUnsuitable` exception will be thrown. This parameter is + *case insensitive*. + :param options: The file format parameters to be used to read the files. Must correspond to the + parameters that the server reports as supported parameters for the chosen `format`. The parameter names + and valid values usually correspond to the GDAL/OGR format options. + + :return: A data cube for further processing. + """ + return load_uploaded_files(paths=self, format=format, options=options) + + @openeo_process + def load_url(self, format, options=UNSET) -> ProcessBuilder: + """ + Load data from a URL + + :param self: The URL to read from. Authentication details such as API keys or tokens may need to be + included in the URL. + :param format: The file format to use when loading the data. It must be one of the values that the + server reports as supported input file formats, which usually correspond to the short GDAL/OGR codes. + If the format is not suitable for loading the data, a `FormatUnsuitable` exception will be thrown. This + parameter is *case insensitive*. + :param options: The file format parameters to use when reading the data. Must correspond to the + parameters that the server reports as supported parameters for the chosen `format`. The parameter names + and valid values usually correspond to the GDAL/OGR format options. + + :return: A data cube for further processing. + """ + return load_url(url=self, format=format, options=options) + + @openeo_process + def log(self, base) -> ProcessBuilder: + """ + Logarithm to a base + + :param self: A number to compute the logarithm for. + :param base: The numerical base. + + :return: The computed logarithm. + """ + return log(x=self, base=base) + + @openeo_process + def lt(self, y) -> ProcessBuilder: + """ + Less than comparison + + :param self: First operand. + :param y: Second operand. + + :return: `true` if `x` is strictly less than `y`, `null` if any operand is `null`, otherwise `false`. + """ + return lt(x=self, y=y) + + @openeo_process + def lte(self, y) -> ProcessBuilder: + """ + Less than or equal to comparison + + :param self: First operand. + :param y: Second operand. + + :return: `true` if `x` is less than or equal to `y`, `null` if any operand is `null`, otherwise + `false`. + """ + return lte(x=self, y=y) + + @openeo_process + def mask(self, mask, replacement=UNSET) -> ProcessBuilder: + """ + Apply a raster mask + + :param self: A raster data cube. + :param mask: A mask as a raster data cube. Every pixel in `data` must have a corresponding element in + `mask`. + :param replacement: The value used to replace masked values with. + + :return: A masked raster data cube with the same dimensions. The dimension properties (name, type, + labels, reference system and resolution) remain unchanged. + """ + return mask(data=self, mask=mask, replacement=replacement) + + @openeo_process + def mask_polygon(self, mask, replacement=UNSET, inside=UNSET) -> ProcessBuilder: + """ + Apply a polygon mask + + :param self: A raster data cube. + :param mask: A GeoJSON object or a vector data cube containing at least one polygon. The provided + vector data can be one of the following: * A `Polygon` or `MultiPolygon` geometry, * a `Feature` with + a `Polygon` or `MultiPolygon` geometry, or * a `FeatureCollection` containing at least one `Feature` + with `Polygon` or `MultiPolygon` geometries. * Empty geometries are ignored. + :param replacement: The value used to replace masked values with. + :param inside: If set to `true` all pixels for which the point at the pixel center **does** intersect + with any polygon are replaced. + + :return: A masked raster data cube with the same dimensions. The dimension properties (name, type, + labels, reference system and resolution) remain unchanged. + """ + return mask_polygon(data=self, mask=mask, replacement=replacement, inside=inside) + + @openeo_process + def max(self, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Maximum value + + :param self: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. + Setting this flag to `false` considers no-data values so that `null` is returned if any value is such a + value. + + :return: The maximum value. + """ + return max(data=self, ignore_nodata=ignore_nodata) + + @openeo_process + def mean(self, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Arithmetic mean (average) + + :param self: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. + Setting this flag to `false` considers no-data values so that `null` is returned if any value is such a + value. + + :return: The computed arithmetic mean. + """ + return mean(data=self, ignore_nodata=ignore_nodata) + + @openeo_process + def median(self, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Statistical median + + :param self: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. + Setting this flag to `false` considers no-data values so that `null` is returned if any value is such a + value. + + :return: The computed statistical median. + """ + return median(data=self, ignore_nodata=ignore_nodata) + + @openeo_process + def merge_cubes(self, cube2, overlap_resolver=UNSET, context=UNSET) -> ProcessBuilder: + """ + Merge two data cubes + + :param self: The base data cube. + :param cube2: The other data cube to be merged with the base data cube. + :param overlap_resolver: A reduction operator that resolves the conflict if the data overlaps. The + reducer must return a value of the same data type as the input values are. The reduction operator may + be a single process such as ``multiply()`` or consist of multiple sub-processes. `null` (the default) + can be specified if no overlap resolver is required. + :param context: Additional data to be passed to the overlap resolver. + + :return: The merged data cube. See the process description for details regarding the dimensions and + dimension properties (name, type, labels, reference system and resolution). + """ + return merge_cubes( + cube1=self, + cube2=cube2, + overlap_resolver=(build_child_callback(overlap_resolver, parent_parameters=['x', 'y', 'context']) if overlap_resolver not in [None, UNSET] else overlap_resolver), + context=context + ) + + @openeo_process + def min(self, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Minimum value + + :param self: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. + Setting this flag to `false` considers no-data values so that `null` is returned if any value is such a + value. + + :return: The minimum value. + """ + return min(data=self, ignore_nodata=ignore_nodata) + + @openeo_process + def mod(self, y) -> ProcessBuilder: + """ + Modulo + + :param self: A number to be used as the dividend. + :param y: A number to be used as the divisor. + + :return: The remainder after division. + """ + return mod(x=self, y=y) + + @openeo_process + def multiply(self, y) -> ProcessBuilder: + """ + Multiplication of two numbers + + :param self: The multiplier. + :param y: The multiplicand. + + :return: The computed product of the two numbers. + """ + return multiply(x=self, y=y) + + @openeo_process + def nan(self) -> ProcessBuilder: + """ + Not a Number (NaN) + + :return: Returns `NaN`. + """ + return nan() + + @openeo_process + def ndvi(self, nir=UNSET, red=UNSET, target_band=UNSET) -> ProcessBuilder: + """ + Normalized Difference Vegetation Index + + :param self: A raster data cube with two bands that have the common names `red` and `nir` assigned. + :param nir: The name of the NIR band. Defaults to the band that has the common name `nir` assigned. + Either the unique band name (metadata field `name` in bands) or one of the common band names (metadata + field `common_name` in bands) can be specified. If the unique band name and the common name conflict, + the unique band name has a higher priority. + :param red: The name of the red band. Defaults to the band that has the common name `red` assigned. + Either the unique band name (metadata field `name` in bands) or one of the common band names (metadata + field `common_name` in bands) can be specified. If the unique band name and the common name conflict, + the unique band name has a higher priority. + :param target_band: By default, the dimension of type `bands` is dropped. To keep the dimension specify + a new band name in this parameter so that a new dimension label with the specified name will be added + for the computed values. + + :return: A raster data cube containing the computed NDVI values. The structure of the data cube differs + depending on the value passed to `target_band`: * `target_band` is `null`: The data cube does not + contain the dimension of type `bands`, the number of dimensions decreases by one. The dimension + properties (name, type, labels, reference system and resolution) for all other dimensions remain + unchanged. * `target_band` is a string: The data cube keeps the same dimensions. The dimension + properties remain unchanged, but the number of dimension labels for the dimension of type `bands` + increases by one. The additional label is named as specified in `target_band`. + """ + return ndvi(data=self, nir=nir, red=red, target_band=target_band) + + @openeo_process + def neq(self, y, delta=UNSET, case_sensitive=UNSET) -> ProcessBuilder: + """ + Not equal to comparison + + :param self: First operand. + :param y: Second operand. + :param delta: Only applicable for comparing two numbers. If this optional parameter is set to a + positive non-zero number the non-equality of two numbers is checked against a delta value. This is + especially useful to circumvent problems with floating-point inaccuracy in machine-based computation. + This option is basically an alias for the following computation: `gt(abs(minus([x, y]), delta)` + :param case_sensitive: Only applicable for comparing two strings. Case sensitive comparison can be + disabled by setting this parameter to `false`. + + :return: `true` if `x` is *not* equal to `y`, `null` if any operand is `null`, otherwise `false`. + """ + return neq(x=self, y=y, delta=delta, case_sensitive=case_sensitive) + + @openeo_process + def normalized_difference(self, y) -> ProcessBuilder: + """ + Normalized difference + + :param self: The value for the first band. + :param y: The value for the second band. + + :return: The computed normalized difference. + """ + return normalized_difference(x=self, y=y) + + @openeo_process + def not_(self) -> ProcessBuilder: + """ + Inverting a boolean + + :param self: Boolean value to invert. + + :return: Inverted boolean value. + """ + return not_(x=self) + + @openeo_process + def or_(self, y) -> ProcessBuilder: + """ + Logical OR + + :param self: A boolean value. + :param y: A boolean value. + + :return: Boolean result of the logical OR. + """ + return or_(x=self, y=y) + + @openeo_process + def order(self, asc=UNSET, nodata=UNSET) -> ProcessBuilder: + """ + Get the order of array elements + + :param self: An array to compute the order for. + :param asc: The default sort order is ascending, with smallest values first. To sort in reverse + (descending) order, set this parameter to `false`. + :param nodata: Controls the handling of no-data values (`null`). By default, they are removed. If set + to `true`, missing values in the data are put last; if set to `false`, they are put first. + + :return: The computed permutation. + """ + return order(data=self, asc=asc, nodata=nodata) + + @openeo_process + def pi(self) -> ProcessBuilder: + """ + Pi (π) + + :return: The numerical value of Pi. + """ + return pi() + + @openeo_process + def power(self, p) -> ProcessBuilder: + """ + Exponentiation + + :param self: The numerical base. + :param p: The numerical exponent. + + :return: The computed value for `base` raised to the power of `p`. + """ + return power(base=self, p=p) + + @openeo_process + def predict_curve(self, function, dimension, labels=UNSET) -> ProcessBuilder: + """ + Predict values + + :param self: A data cube with optimal values, e.g. computed by the process ``fit_curve()``. + :param function: The model function. It must take the parameters to fit as array through the first + argument and the independent variable `x` as the second argument. It is recommended to store the model + function as a user-defined process on the back-end. + :param dimension: The name of the dimension for predictions. + :param labels: The labels to predict values for. If no labels are given, predicts values only for no- + data (`null`) values in the data cube. + + :return: A data cube with the predicted values with the provided dimension `dimension` having as many + labels as provided through `labels`. + """ + return predict_curve( + parameters=self, + function=build_child_callback(function, parent_parameters=['x', 'parameters']), + dimension=dimension, + labels=labels + ) + + @openeo_process + def predict_random_forest(self, model) -> ProcessBuilder: + """ + Predict values based on a Random Forest model + + :param self: An array of numbers. + :param model: A model object that can be trained with the processes ``fit_regr_random_forest()`` + (regression) and ``fit_class_random_forest()`` (classification). + + :return: The predicted value. Returns `null` if any of the given values in the array is a no-data + value. + """ + return predict_random_forest(data=self, model=model) + + @openeo_process + def product(self, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Compute the product by multiplying numbers + + :param self: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. + Setting this flag to `false` considers no-data values so that `null` is returned if any value is such a + value. + + :return: The computed product of the sequence of numbers. + """ + return product(data=self, ignore_nodata=ignore_nodata) + + @openeo_process + def quantiles(self, probabilities=UNSET, q=UNSET, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Quantiles + + :param self: An array of numbers. + :param probabilities: Quantiles to calculate. Either a list of probabilities or the number of + intervals: * Provide an array with a sorted list of probabilities in ascending order to calculate + quantiles for. The probabilities must be between 0 and 1 (inclusive). If not sorted in ascending order, + an `AscendingProbabilitiesRequired` exception is thrown. * Provide an integer to specify the number of + intervals to calculate quantiles for. Calculates q-quantiles with equal-sized intervals. + :param q: Number of intervals to calculate quantiles for. Calculates q-quantiles with equal-sized + intervals. This parameter has been **deprecated**. Please use the parameter `probabilities` instead. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. + Setting this flag to `false` considers no-data values so that an array with `null` values is returned + if any element is such a value. + + :return: An array with the computed quantiles. The list has either * as many elements as the given + list of `probabilities` had or * *`q`-1* elements. If the input array is empty the resulting array is + filled with as many `null` values as required according to the list above. See the 'Empty array' + example for an example. + """ + return quantiles(data=self, probabilities=probabilities, q=q, ignore_nodata=ignore_nodata) + + @openeo_process + def rearrange(self, order) -> ProcessBuilder: + """ + Sort an array based on a permutation + + :param self: The array to rearrange. + :param order: The permutation used for rearranging. + + :return: The rearranged array. + """ + return rearrange(data=self, order=order) + + @openeo_process + def reduce_dimension(self, reducer, dimension, context=UNSET) -> ProcessBuilder: + """ + Reduce dimensions + + :param self: A data cube. + :param reducer: A reducer to apply on the specified dimension. A reducer is a single process such as + ``mean()`` or a set of processes, which computes a single value for a list of values, see the category + 'reducer' for such processes. + :param dimension: The name of the dimension over which to reduce. Fails with a `DimensionNotAvailable` + exception if the specified dimension does not exist. + :param context: Additional data to be passed to the reducer. + + :return: A data cube with the newly computed values. It is missing the given dimension, the number of + dimensions decreases by one. The dimension properties (name, type, labels, reference system and + resolution) for all other dimensions remain unchanged. + """ + return reduce_dimension( + data=self, + reducer=build_child_callback(reducer, parent_parameters=['data', 'context']), + dimension=dimension, + context=context + ) + + @openeo_process + def reduce_spatial(self, reducer, context=UNSET) -> ProcessBuilder: + """ + Reduce spatial dimensions 'x' and 'y' + + :param self: A raster data cube. + :param reducer: A reducer to apply on the horizontal spatial dimensions. A reducer is a single process + such as ``mean()`` or a set of processes, which computes a single value for a list of values, see the + category 'reducer' for such processes. + :param context: Additional data to be passed to the reducer. + + :return: A data cube with the newly computed values. It is missing the horizontal spatial dimensions, + the number of dimensions decreases by two. The dimension properties (name, type, labels, reference + system and resolution) for all other dimensions remain unchanged. + """ + return reduce_spatial(data=self, reducer=build_child_callback(reducer, parent_parameters=['data', 'context']), context=context) + + @openeo_process + def rename_dimension(self, source, target) -> ProcessBuilder: + """ + Rename a dimension + + :param self: The data cube. + :param source: The current name of the dimension. Fails with a `DimensionNotAvailable` exception if the + specified dimension does not exist. + :param target: A new Name for the dimension. Fails with a `DimensionExists` exception if a dimension + with the specified name exists. + + :return: A data cube with the same dimensions, but the name of one of the dimensions changes. The old + name can not be referred to any longer. The dimension properties (name, type, labels, reference system + and resolution) remain unchanged. + """ + return rename_dimension(data=self, source=source, target=target) + + @openeo_process + def rename_labels(self, dimension, target, source=UNSET) -> ProcessBuilder: + """ + Rename dimension labels + + :param self: The data cube. + :param dimension: The name of the dimension to rename the labels for. + :param target: The new names for the labels. If a target dimension label already exists in the data + cube, a `LabelExists` exception is thrown. + :param source: The original names of the labels to be renamed to corresponding array elements in the + parameter `target`. It is allowed to only specify a subset of labels to rename, as long as the `target` + and `source` parameter have the same length. The order of the labels doesn't need to match the order of + the dimension labels in the data cube. By default, the array is empty so that the dimension labels in + the data cube are expected to be enumerated. If the dimension labels are not enumerated and the given + array is empty, the `LabelsNotEnumerated` exception is thrown. If one of the source dimension labels + doesn't exist, the `LabelNotAvailable` exception is thrown. + + :return: The data cube with the same dimensions. The dimension properties (name, type, labels, + reference system and resolution) remain unchanged, except that for the given dimension the labels + change. The old labels can not be referred to any longer. The number of labels remains the same. + """ + return rename_labels(data=self, dimension=dimension, target=target, source=source) + + @openeo_process + def resample_cube_spatial(self, target, method=UNSET) -> ProcessBuilder: + """ + Resample the spatial dimensions to match a target data cube + + :param self: A raster data cube. + :param target: A raster data cube that describes the spatial target resolution. + :param method: Resampling method to use. The following options are available and are meant to align + with [`gdalwarp`](https://gdal.org/programs/gdalwarp.html#cmdoption-gdalwarp-r): * `average`: average + (mean) resampling, computes the weighted average of all valid pixels * `bilinear`: bilinear resampling + * `cubic`: cubic resampling * `cubicspline`: cubic spline resampling * `lanczos`: Lanczos windowed sinc + resampling * `max`: maximum resampling, selects the maximum value from all valid pixels * `med`: median + resampling, selects the median value of all valid pixels * `min`: minimum resampling, selects the + minimum value from all valid pixels * `mode`: mode resampling, selects the value which appears most + often of all the sampled points * `near`: nearest neighbour resampling (default) * `q1`: first quartile + resampling, selects the first quartile value of all valid pixels * `q3`: third quartile resampling, + selects the third quartile value of all valid pixels * `rms` root mean square (quadratic mean) of all + valid pixels * `sum`: compute the weighted sum of all valid pixels Valid pixels are determined based + on the function ``is_valid()``. + + :return: A raster data cube with the same dimensions. The dimension properties (name, type, labels, + reference system and resolution) remain unchanged, except for the resolution and dimension labels of + the spatial dimensions. + """ + return resample_cube_spatial(data=self, target=target, method=method) + + @openeo_process + def resample_cube_temporal(self, target, dimension=UNSET, valid_within=UNSET) -> ProcessBuilder: + """ + Resample temporal dimensions to match a target data cube + + :param self: A data cube with one or more temporal dimensions. + :param target: A data cube that describes the temporal target resolution. + :param dimension: The name of the temporal dimension to resample, which must exist with this name in + both data cubes. If the dimension is not set or is set to `null`, the process resamples all temporal + dimensions that exist with the same names in both data cubes. The following exceptions may occur: * A + dimension is given, but it does not exist in any of the data cubes: `DimensionNotAvailable` * A + dimension is given, but one of them is not temporal: `DimensionMismatch` * No specific dimension name + is given and there are no temporal dimensions with the same name in the data: `DimensionMismatch` + :param valid_within: Setting this parameter to a numerical value enables that the process searches for + valid values within the given period of days before and after the target timestamps. Valid values are + determined based on the function ``is_valid()``. For example, the limit of `7` for the target + timestamps `2020-01-15 12:00:00` looks for a nearest neighbor after `2020-01-08 12:00:00` and before + `2020-01-22 12:00:00`. If no valid value is found within the given period, the value will be set to no- + data (`null`). + + :return: A data cube with the same dimensions and the same dimension properties (name, type, labels, + reference system and resolution) for all non-temporal dimensions. For the temporal dimension, the name + and type remain unchanged, but the dimension labels, resolution and reference system may change. + """ + return resample_cube_temporal(data=self, target=target, dimension=dimension, valid_within=valid_within) + + @openeo_process + def resample_spatial(self, resolution=UNSET, projection=UNSET, method=UNSET, align=UNSET) -> ProcessBuilder: + """ + Resample and warp the spatial dimensions + + :param self: A raster data cube. + :param resolution: Resamples the data cube to the target resolution, which can be specified either as + separate values for x and y or as a single value for both axes. Specified in the units of the target + projection. Doesn't change the resolution by default (`0`). + :param projection: Warps the data cube to the target projection, specified as as [EPSG + code](http://www.epsg-registry.org/) or [WKT2 CRS + string](http://docs.opengeospatial.org/is/18-010r7/18-010r7.html). By default (`null`), the projection + is not changed. + :param method: Resampling method to use. The following options are available and are meant to align + with [`gdalwarp`](https://gdal.org/programs/gdalwarp.html#cmdoption-gdalwarp-r): * `average`: average + (mean) resampling, computes the weighted average of all valid pixels * `bilinear`: bilinear resampling + * `cubic`: cubic resampling * `cubicspline`: cubic spline resampling * `lanczos`: Lanczos windowed sinc + resampling * `max`: maximum resampling, selects the maximum value from all valid pixels * `med`: median + resampling, selects the median value of all valid pixels * `min`: minimum resampling, selects the + minimum value from all valid pixels * `mode`: mode resampling, selects the value which appears most + often of all the sampled points * `near`: nearest neighbour resampling (default) * `q1`: first quartile + resampling, selects the first quartile value of all valid pixels * `q3`: third quartile resampling, + selects the third quartile value of all valid pixels * `rms` root mean square (quadratic mean) of all + valid pixels * `sum`: compute the weighted sum of all valid pixels Valid pixels are determined based + on the function ``is_valid()``. + :param align: Specifies to which corner of the spatial extent the new resampled data is aligned to. + + :return: A raster data cube with values warped onto the new projection. It has the same dimensions and + the same dimension properties (name, type, labels, reference system and resolution) for all non-spatial + or vertical spatial dimensions. For the horizontal spatial dimensions the name and type remain + unchanged, but reference system, labels and resolution may change depending on the given parameters. + """ + return resample_spatial(data=self, resolution=resolution, projection=projection, method=method, align=align) + + @openeo_process + def round(self, p=UNSET) -> ProcessBuilder: + """ + Round to a specified precision + + :param self: A number to round. + :param p: A positive number specifies the number of digits after the decimal point to round to. A + negative number means rounding to a power of ten, so for example *-2* rounds to the nearest hundred. + Defaults to *0*. + + :return: The rounded number. + """ + return round(x=self, p=p) + + @openeo_process + def run_udf(self, udf, runtime, version=UNSET, context=UNSET) -> ProcessBuilder: + """ + Run a UDF + + :param self: The data to be passed to the UDF. + :param udf: Either source code, an absolute URL or a path to a UDF script. + :param runtime: A UDF runtime identifier available at the back-end. + :param version: An UDF runtime version. If set to `null`, the default runtime version specified for + each runtime is used. + :param context: Additional data such as configuration options to be passed to the UDF. + + :return: The data processed by the UDF. The returned value can be of any data type and is exactly what + the UDF code returns. + """ + return run_udf(data=self, udf=udf, runtime=runtime, version=version, context=context) + + @openeo_process + def run_udf_externally(self, url, context=UNSET) -> ProcessBuilder: + """ + Run an externally hosted UDF container + + :param self: The data to be passed to the UDF. + :param url: Absolute URL to a remote UDF service. + :param context: Additional data such as configuration options to be passed to the UDF. + + :return: The data processed by the UDF. The returned value can in principle be of any data type, but it + depends on what is returned by the UDF code. Please see the implemented UDF interface for details. + """ + return run_udf_externally(data=self, url=url, context=context) + + @openeo_process + def sar_backscatter(self, coefficient=UNSET, elevation_model=UNSET, mask=UNSET, contributing_area=UNSET, local_incidence_angle=UNSET, ellipsoid_incidence_angle=UNSET, noise_removal=UNSET, options=UNSET) -> ProcessBuilder: + """ + Computes backscatter from SAR input + + :param self: The source data cube containing SAR input. + :param coefficient: Select the radiometric correction coefficient. The following options are available: + * `beta0`: radar brightness * `sigma0-ellipsoid`: ground area computed with ellipsoid earth model * + `sigma0-terrain`: ground area computed with terrain earth model * `gamma0-ellipsoid`: ground area + computed with ellipsoid earth model in sensor line of sight * `gamma0-terrain`: ground area computed + with terrain earth model in sensor line of sight (default) * `null`: non-normalized backscatter + :param elevation_model: The digital elevation model to use. Set to `null` (the default) to allow the + back-end to choose, which will improve portability, but reduce reproducibility. + :param mask: If set to `true`, a data mask is added to the bands with the name `mask`. It indicates + which values are valid (1), invalid (0) or contain no-data (null). + :param contributing_area: If set to `true`, a DEM-based local contributing area band named + `contributing_area` is added. The values are given in square meters. + :param local_incidence_angle: If set to `true`, a DEM-based local incidence angle band named + `local_incidence_angle` is added. The values are given in degrees. + :param ellipsoid_incidence_angle: If set to `true`, an ellipsoidal incidence angle band named + `ellipsoid_incidence_angle` is added. The values are given in degrees. + :param noise_removal: If set to `false`, no noise removal is applied. Defaults to `true`, which removes + noise. + :param options: Proprietary options for the backscatter computations. Specifying proprietary options + will reduce portability. + + :return: Backscatter values corresponding to the chosen parametrization. The values are given in linear + scale. + """ + return sar_backscatter( + data=self, + coefficient=coefficient, + elevation_model=elevation_model, + mask=mask, + contributing_area=contributing_area, + local_incidence_angle=local_incidence_angle, + ellipsoid_incidence_angle=ellipsoid_incidence_angle, + noise_removal=noise_removal, + options=options + ) + + @openeo_process + def save_result(self, format, options=UNSET) -> ProcessBuilder: + """ + Save processed data + + :param self: The data to deliver in the given file format. + :param format: The file format to use. It must be one of the values that the server reports as + supported output file formats, which usually correspond to the short GDAL/OGR codes. This parameter is + *case insensitive*. * If the data cube is empty and the file format can't store empty data cubes, a + `DataCubeEmpty` exception is thrown. * If the file format is otherwise not suitable for storing the + underlying data structure, a `FormatUnsuitable` exception is thrown. + :param options: The file format parameters to be used to create the file(s). Must correspond to the + parameters that the server reports as supported parameters for the chosen `format`. The parameter names + and valid values usually correspond to the GDAL/OGR format options. + + :return: Always returns `true` as in case of an error an exception is thrown which aborts the execution + of the process. + """ + return save_result(data=self, format=format, options=options) + + @openeo_process + def sd(self, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Standard deviation + + :param self: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. + Setting this flag to `false` considers no-data values so that `null` is returned if any value is such a + value. + + :return: The computed sample standard deviation. + """ + return sd(data=self, ignore_nodata=ignore_nodata) + + @openeo_process + def sgn(self) -> ProcessBuilder: + """ + Signum + + :param self: A number. + + :return: The computed signum value of `x`. + """ + return sgn(x=self) + + @openeo_process + def sin(self) -> ProcessBuilder: + """ + Sine + + :param self: An angle in radians. + + :return: The computed sine of `x`. + """ + return sin(x=self) + + @openeo_process + def sinh(self) -> ProcessBuilder: + """ + Hyperbolic sine + + :param self: An angle in radians. + + :return: The computed hyperbolic sine of `x`. + """ + return sinh(x=self) + + @openeo_process + def sort(self, asc=UNSET, nodata=UNSET) -> ProcessBuilder: + """ + Sort data + + :param self: An array with data to sort. + :param asc: The default sort order is ascending, with smallest values first. To sort in reverse + (descending) order, set this parameter to `false`. + :param nodata: Controls the handling of no-data values (`null`). By default, they are removed. If set + to `true`, missing values in the data are put last; if set to `false`, they are put first. + + :return: The sorted array. + """ + return sort(data=self, asc=asc, nodata=nodata) + + @openeo_process + def sqrt(self) -> ProcessBuilder: + """ + Square root + + :param self: A number. + + :return: The computed square root. + """ + return sqrt(x=self) + + @openeo_process + def subtract(self, y) -> ProcessBuilder: + """ + Subtraction of two numbers + + :param self: The minuend. + :param y: The subtrahend. + + :return: The computed result. + """ + return subtract(x=self, y=y) + + @openeo_process + def sum(self, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Compute the sum by adding up numbers + + :param self: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. + Setting this flag to `false` considers no-data values so that `null` is returned if any value is such a + value. + + :return: The computed sum of the sequence of numbers. + """ + return sum(data=self, ignore_nodata=ignore_nodata) + + @openeo_process + def tan(self) -> ProcessBuilder: + """ + Tangent + + :param self: An angle in radians. + + :return: The computed tangent of `x`. + """ + return tan(x=self) + + @openeo_process + def tanh(self) -> ProcessBuilder: + """ + Hyperbolic tangent + + :param self: An angle in radians. + + :return: The computed hyperbolic tangent of `x`. + """ + return tanh(x=self) + + @openeo_process + def text_begins(self, pattern, case_sensitive=UNSET) -> ProcessBuilder: + """ + Text begins with another text + + :param self: Text in which to find something at the beginning. + :param pattern: Text to find at the beginning of `data`. Regular expressions are not supported. + :param case_sensitive: Case sensitive comparison can be disabled by setting this parameter to `false`. + + :return: `true` if `data` begins with `pattern`, false` otherwise. + """ + return text_begins(data=self, pattern=pattern, case_sensitive=case_sensitive) + + @openeo_process + def text_concat(self, separator=UNSET) -> ProcessBuilder: + """ + Concatenate elements to a single text + + :param self: A set of elements. Numbers, boolean values and null values get converted to their (lower + case) string representation. For example: `1` (integer), `-1.5` (number), `true` / `false` (boolean + values) + :param separator: A separator to put between each of the individual texts. Defaults to an empty string. + + :return: A string containing a string representation of all the array elements in the same order, with + the separator between each element. + """ + return text_concat(data=self, separator=separator) + + @openeo_process + def text_contains(self, pattern, case_sensitive=UNSET) -> ProcessBuilder: + """ + Text contains another text + + :param self: Text in which to find something in. + :param pattern: Text to find in `data`. Regular expressions are not supported. + :param case_sensitive: Case sensitive comparison can be disabled by setting this parameter to `false`. + + :return: `true` if `data` contains the `pattern`, false` otherwise. + """ + return text_contains(data=self, pattern=pattern, case_sensitive=case_sensitive) + + @openeo_process + def text_ends(self, pattern, case_sensitive=UNSET) -> ProcessBuilder: + """ + Text ends with another text + + :param self: Text in which to find something at the end. + :param pattern: Text to find at the end of `data`. Regular expressions are not supported. + :param case_sensitive: Case sensitive comparison can be disabled by setting this parameter to `false`. + + :return: `true` if `data` ends with `pattern`, false` otherwise. + """ + return text_ends(data=self, pattern=pattern, case_sensitive=case_sensitive) + + @openeo_process + def trim_cube(self) -> ProcessBuilder: + """ + Remove dimension labels with no-data values + + :param self: A data cube to trim. + + :return: A trimmed data cube with the same dimensions. The dimension properties name, type, reference + system and resolution remain unchanged. The number of dimension labels may decrease. + """ + return trim_cube(data=self) + + @openeo_process + def unflatten_dimension(self, dimension, target_dimensions, label_separator=UNSET) -> ProcessBuilder: + """ + Split a single dimensions into multiple dimensions + + :param self: A data cube that is consistently structured so that operation can execute flawlessly (e.g. + the dimension labels need to contain the `label_separator` exactly 1 time for two target dimensions, 2 + times for three target dimensions etc.). + :param dimension: The name of the dimension to split. + :param target_dimensions: The names of the new target dimensions. New dimensions will be created with + the given names and type `other` (see ``add_dimension()``). Fails with a `TargetDimensionExists` + exception if any of the dimensions exists. The order of the array defines the order in which the + dimensions and dimension labels are added to the data cube (see the example in the process + description). + :param label_separator: The string that will be used as a separator to split the dimension labels. + + :return: A data cube with the new shape. The dimension properties (name, type, labels, reference system + and resolution) for all other dimensions remain unchanged. + """ + return unflatten_dimension(data=self, dimension=dimension, target_dimensions=target_dimensions, label_separator=label_separator) + + @openeo_process + def variance(self, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Variance + + :param self: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. + Setting this flag to `false` considers no-data values so that `null` is returned if any value is such a + value. + + :return: The computed sample variance. + """ + return variance(data=self, ignore_nodata=ignore_nodata) + + @openeo_process + def vector_buffer(self, distance) -> ProcessBuilder: + """ + Buffer geometries by distance + + :param self: Geometries to apply the buffer on. Feature properties are preserved. + :param distance: The distance of the buffer in meters. A positive distance expands the geometries, + resulting in outward buffering (dilation), while a negative distance shrinks the geometries, resulting + in inward buffering (erosion). If the unit of the spatial reference system is not meters, a + `UnitMismatch` error is thrown. Use ``vector_reproject()`` to convert the geometries to a suitable + spatial reference system. + + :return: Returns a vector data cube with the computed new geometries of which some may be empty. + """ + return vector_buffer(geometries=self, distance=distance) + + @openeo_process + def vector_reproject(self, projection, dimension=UNSET) -> ProcessBuilder: + """ + Reprojects the geometry dimension + + :param self: A vector data cube. + :param projection: Coordinate reference system to reproject to. Specified as an [EPSG + code](http://www.epsg-registry.org/) or [WKT2 CRS + string](http://docs.opengeospatial.org/is/18-010r7/18-010r7.html). + :param dimension: The name of the geometry dimension to reproject. If no specific dimension is + specified, the filter applies to all geometry dimensions. Fails with a `DimensionNotAvailable` + exception if the specified dimension does not exist. + + :return: A vector data cube with geometries projected to the new coordinate reference system. The + reference system of the geometry dimension changes, all other dimensions and properties remain + unchanged. + """ + return vector_reproject(data=self, projection=projection, dimension=dimension) + + @openeo_process + def vector_to_random_points(self, geometry_count=UNSET, total_count=UNSET, group=UNSET, seed=UNSET) -> ProcessBuilder: + """ + Sample random points from geometries + + :param self: Input geometries for sample extraction. + :param geometry_count: The maximum number of points to compute per geometry. Points in the input + geometries can be selected only once by the sampling. + :param total_count: The maximum number of points to compute overall. Throws a `CountMismatch` + exception if the specified value is less than the provided number of geometries. + :param group: Specifies whether the sampled points should be grouped by input geometry (default) or be + generated as independent points. * If the sampled points are grouped, the process generates a + `MultiPoint` per geometry given which keeps the original identifier if present. * Otherwise, each + sampled point is generated as a distinct `Point` geometry without identifier. + :param seed: A randomization seed to use for random sampling. If not given or `null`, no seed is used + and results may differ on subsequent use. + + :return: Returns a vector data cube with the sampled points. + """ + return vector_to_random_points(data=self, geometry_count=geometry_count, total_count=total_count, group=group, seed=seed) + + @openeo_process + def vector_to_regular_points(self, distance, group=UNSET) -> ProcessBuilder: + """ + Sample regular points from geometries + + :param self: Input geometries for sample extraction. + :param distance: Defines the minimum distance in meters that is required between two samples generated + *inside* a single geometry. If the unit of the spatial reference system is not meters, a `UnitMismatch` + error is thrown. Use ``vector_reproject()`` to convert the geometries to a suitable spatial reference + system. - For **polygons**, the distance defines the cell sizes of a regular grid that starts at the + upper-left bound of each polygon. The centroid of each cell is then a sample point. If the centroid is + not enclosed in the polygon, no point is sampled. If no point can be sampled for the geometry at all, + the first coordinate of the geometry is returned as point. - For **lines** (line strings), the sampling + starts with a point at the first coordinate of the line and then walks along the line and samples a new + point each time the distance to the previous point has been reached again. - For **points**, the point + is returned as given. + :param group: Specifies whether the sampled points should be grouped by input geometry (default) or be + generated as independent points. * If the sampled points are grouped, the process generates a + `MultiPoint` per geometry given which keeps the original identifier if present. * Otherwise, each + sampled point is generated as a distinct `Point` geometry without identifier. + + :return: Returns a vector data cube with the sampled points. + """ + return vector_to_regular_points(data=self, distance=distance, group=group) + + @openeo_process + def xor(self, y) -> ProcessBuilder: + """ + Logical XOR (exclusive or) + + :param self: A boolean value. + :param y: A boolean value. + + :return: Boolean result of the logical XOR. + """ + return xor(x=self, y=y) + + +# Public shortcut +process = ProcessBuilder.process +# Private shortcut that has lower chance to collide with a process argument named `process` +_process = ProcessBuilder.process + + +@openeo_process +def absolute(x) -> ProcessBuilder: + """ + Absolute value + + :param x: A number. + + :return: The computed absolute value. + """ + return _process('absolute', x=x) + + +@openeo_process +def add(x, y) -> ProcessBuilder: + """ + Addition of two numbers + + :param x: The first summand. + :param y: The second summand. + + :return: The computed sum of the two numbers. + """ + return _process('add', x=x, y=y) + + +@openeo_process +def add_dimension(data, name, label, type=UNSET) -> ProcessBuilder: + """ + Add a new dimension + + :param data: A data cube to add the dimension to. + :param name: Name for the dimension. + :param label: A dimension label. + :param type: The type of dimension, defaults to `other`. + + :return: The data cube with a newly added dimension. The new dimension has exactly one dimension label. All + other dimensions remain unchanged. + """ + return _process('add_dimension', data=data, name=name, label=label, type=type) + + +@openeo_process +def aggregate_spatial(data, geometries, reducer, target_dimension=UNSET, context=UNSET) -> ProcessBuilder: + """ + Zonal statistics for geometries + + :param data: A raster data cube with at least two spatial dimensions. The data cube implicitly gets + restricted to the bounds of the geometries as if ``filter_spatial()`` would have been used with the same + values for the corresponding parameters immediately before this process. + :param geometries: Geometries for which the aggregation will be computed. Feature properties are preserved + for vector data cubes and all GeoJSON Features. One value will be computed per label in the dimension of + type `geometries`, GeoJSON `Feature` or `Geometry`. For a `FeatureCollection` multiple values will be + computed, one value per contained `Feature`. No values will be computed for empty geometries. For example, + a single value will be computed for a `MultiPolygon`, but two values will be computed for a + `FeatureCollection` containing two polygons. - For **polygons**, the process considers all pixels for + which the point at the pixel center intersects with the corresponding polygon (as defined in the Simple + Features standard by the OGC). - For **points**, the process considers the closest pixel center. - For + **lines** (line strings), the process considers all the pixels whose centers are closest to at least one + point on the line. Thus, pixels may be part of multiple geometries and be part of multiple aggregations. + No operation is applied to geometries that are outside of the bounds of the data. + :param reducer: A reducer to be applied on all values of each geometry. A reducer is a single process such + as ``mean()`` or a set of processes, which computes a single value for a list of values, see the category + 'reducer' for such processes. + :param target_dimension: By default (which is `null`), the process only computes the results and doesn't + add a new dimension. If this parameter contains a new dimension name, the computation also stores + information about the total count of pixels (valid + invalid pixels) and the number of valid pixels (see + ``is_valid()``) for each computed value. These values are added as a new dimension. The new dimension of + type `other` has the dimension labels `value`, `total_count` and `valid_count`. Fails with a + `TargetDimensionExists` exception if a dimension with the specified name exists. + :param context: Additional data to be passed to the reducer. + + :return: A vector data cube with the computed results. Empty geometries still exist but without any + aggregated values (i.e. no-data). The spatial dimensions are replaced by a dimension of type 'geometries' + and if `target_dimension` is not `null`, a new dimension is added. + """ + return _process('aggregate_spatial', + data=data, + geometries=geometries, + reducer=build_child_callback(reducer, parent_parameters=['data', 'context']), + target_dimension=target_dimension, + context=context + ) + + +@openeo_process +def aggregate_spatial_window(data, reducer, size, boundary=UNSET, align=UNSET, context=UNSET) -> ProcessBuilder: + """ + Zonal statistics for rectangular windows + + :param data: A raster data cube with exactly two horizontal spatial dimensions and an arbitrary number of + additional dimensions. The process is applied to all additional dimensions individually. + :param reducer: A reducer to be applied on the list of values, which contain all pixels covered by the + window. A reducer is a single process such as ``mean()`` or a set of processes, which computes a single + value for a list of values, see the category 'reducer' for such processes. + :param size: Window size in pixels along the horizontal spatial dimensions. The first value corresponds to + the `x` axis, the second value corresponds to the `y` axis. + :param boundary: Behavior to apply if the number of values for the axes `x` and `y` is not a multiple of + the corresponding value in the `size` parameter. Options are: - `pad` (default): pad the data cube with + the no-data value `null` to fit the required window size. - `trim`: trim the data cube to fit the required + window size. Set the parameter `align` to specifies to which corner the data is aligned to. + :param align: If the data requires padding or trimming (see parameter `boundary`), specifies to which + corner of the spatial extent the data is aligned to. For example, if the data is aligned to the upper left, + the process pads/trims at the lower-right. + :param context: Additional data to be passed to the reducer. + + :return: A raster data cube with the newly computed values and the same dimensions. The resolution will + change depending on the chosen values for the `size` and `boundary` parameter. It usually decreases for the + dimensions which have the corresponding parameter `size` set to values greater than 1. The dimension + labels will be set to the coordinate at the center of the window. The other dimension properties (name, + type and reference system) remain unchanged. + """ + return _process('aggregate_spatial_window', + data=data, + reducer=build_child_callback(reducer, parent_parameters=['data', 'context']), + size=size, + boundary=boundary, + align=align, + context=context + ) + + +@openeo_process +def aggregate_temporal(data, intervals, reducer, labels=UNSET, dimension=UNSET, context=UNSET) -> ProcessBuilder: + """ + Temporal aggregations + + :param data: A data cube. + :param intervals: Left-closed temporal intervals, which are allowed to overlap. Each temporal interval in + the array has exactly two elements: 1. The first element is the start of the temporal interval. The + specified time instant is **included** in the interval. 2. The second element is the end of the temporal + interval. The specified time instant is **excluded** from the interval. The second element must always be + greater/later than the first element, except when using time without date. Otherwise, a + `TemporalExtentEmpty` exception is thrown. + :param reducer: A reducer to be applied for the values contained in each interval. A reducer is a single + process such as ``mean()`` or a set of processes, which computes a single value for a list of values, see + the category 'reducer' for such processes. Intervals may not contain any values, which for most reducers + leads to no-data (`null`) values by default. + :param labels: Distinct labels for the intervals, which can contain dates and/or times. Is only required to + be specified if the values for the start of the temporal intervals are not distinct and thus the default + labels would not be unique. The number of labels and the number of groups need to be equal. + :param dimension: The name of the temporal dimension for aggregation. All data along the dimension is + passed through the specified reducer. If the dimension is not set or set to `null`, the data cube is + expected to only have one temporal dimension. Fails with a `TooManyDimensions` exception if it has more + dimensions. Fails with a `DimensionNotAvailable` exception if the specified dimension does not exist. + :param context: Additional data to be passed to the reducer. + + :return: A new data cube with the same dimensions. The dimension properties (name, type, labels, reference + system and resolution) remain unchanged, except for the resolution and dimension labels of the given + temporal dimension. + """ + return _process('aggregate_temporal', + data=data, + intervals=intervals, + reducer=build_child_callback(reducer, parent_parameters=['data', 'context']), + labels=labels, + dimension=dimension, + context=context + ) + + +@openeo_process +def aggregate_temporal_period(data, period, reducer, dimension=UNSET, context=UNSET) -> ProcessBuilder: + """ + Temporal aggregations based on calendar hierarchies + + :param data: The source data cube. + :param period: The time intervals to aggregate. The following pre-defined values are available: * `hour`: + Hour of the day * `day`: Day of the year * `week`: Week of the year * `dekad`: Ten day periods, counted per + year with three periods per month (day 1 - 10, 11 - 20 and 21 - end of month). The third dekad of the month + can range from 8 to 11 days. For example, the third dekad of a year spans from January 21 till January 31 + (11 days), the fourth dekad spans from February 1 till February 10 (10 days) and the sixth dekad spans from + February 21 till February 28 or February 29 in a leap year (8 or 9 days respectively). * `month`: Month of + the year * `season`: Three month periods of the calendar seasons (December - February, March - May, June - + August, September - November). * `tropical-season`: Six month periods of the tropical seasons (November - + April, May - October). * `year`: Proleptic years * `decade`: Ten year periods ([0-to-9 + decade](https://en.wikipedia.org/wiki/Decade#0-to-9_decade)), from a year ending in a 0 to the next year + ending in a 9. * `decade-ad`: Ten year periods ([1-to-0 + decade](https://en.wikipedia.org/wiki/Decade#1-to-0_decade)) better aligned with the anno Domini (AD) + calendar era, from a year ending in a 1 to the next year ending in a 0. + :param reducer: A reducer to be applied for the values contained in each period. A reducer is a single + process such as ``mean()`` or a set of processes, which computes a single value for a list of values, see + the category 'reducer' for such processes. Periods may not contain any values, which for most reducers + leads to no-data (`null`) values by default. + :param dimension: The name of the temporal dimension for aggregation. All data along the dimension is + passed through the specified reducer. If the dimension is not set or set to `null`, the source data cube is + expected to only have one temporal dimension. Fails with a `TooManyDimensions` exception if it has more + dimensions. Fails with a `DimensionNotAvailable` exception if the specified dimension does not exist. + :param context: Additional data to be passed to the reducer. + + :return: A new data cube with the same dimensions. The dimension properties (name, type, labels, reference + system and resolution) remain unchanged, except for the resolution and dimension labels of the given + temporal dimension. The specified temporal dimension has the following dimension labels (`YYYY` = four- + digit year, `MM` = two-digit month, `DD` two-digit day of month): * `hour`: `YYYY-MM-DD-00` - `YYYY-MM- + DD-23` * `day`: `YYYY-001` - `YYYY-365` * `week`: `YYYY-01` - `YYYY-52` * `dekad`: `YYYY-00` - `YYYY-36` * + `month`: `YYYY-01` - `YYYY-12` * `season`: `YYYY-djf` (December - February), `YYYY-mam` (March - May), + `YYYY-jja` (June - August), `YYYY-son` (September - November). * `tropical-season`: `YYYY-ndjfma` (November + - April), `YYYY-mjjaso` (May - October). * `year`: `YYYY` * `decade`: `YYY0` * `decade-ad`: `YYY1` The + dimension labels in the new data cube are complete for the whole extent of the source data cube. For + example, if `period` is set to `day` and the source data cube has two dimension labels at the beginning of + the year (`2020-01-01`) and the end of a year (`2020-12-31`), the process returns a data cube with 365 + dimension labels (`2020-001`, `2020-002`, ..., `2020-365`). In contrast, if `period` is set to `day` and + the source data cube has just one dimension label `2020-01-05`, the process returns a data cube with just a + single dimension label (`2020-005`). + """ + return _process('aggregate_temporal_period', + data=data, + period=period, + reducer=build_child_callback(reducer, parent_parameters=['data', 'context']), + dimension=dimension, + context=context + ) + + +@openeo_process +def all(data, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Are all of the values true? + + :param data: A set of boolean values. + :param ignore_nodata: Indicates whether no-data values are ignored or not and ignores them by default. + + :return: Boolean result of the logical operation. + """ + return _process('all', data=data, ignore_nodata=ignore_nodata) + + +@openeo_process +def and_(x, y) -> ProcessBuilder: + """ + Logical AND + + :param x: A boolean value. + :param y: A boolean value. + + :return: Boolean result of the logical AND. + """ + return _process('and', x=x, y=y) + + +@openeo_process +def anomaly(data, normals, period) -> ProcessBuilder: + """ + Compute anomalies + + :param data: A data cube with exactly one temporal dimension and the following dimension labels for the + given period (`YYYY` = four-digit year, `MM` = two-digit month, `DD` two-digit day of month): * `hour`: + `YYYY-MM-DD-00` - `YYYY-MM-DD-23` * `day`: `YYYY-001` - `YYYY-365` * `week`: `YYYY-01` - `YYYY-52` * + `dekad`: `YYYY-00` - `YYYY-36` * `month`: `YYYY-01` - `YYYY-12` * `season`: `YYYY-djf` (December - + February), `YYYY-mam` (March - May), `YYYY-jja` (June - August), `YYYY-son` (September - November). * + `tropical-season`: `YYYY-ndjfma` (November - April), `YYYY-mjjaso` (May - October). * `year`: `YYYY` * + `decade`: `YYY0` * `decade-ad`: `YYY1` * `single-period` / `climatology-period`: Any + ``aggregate_temporal_period()`` can compute such a data cube. + :param normals: A data cube with normals, e.g. daily, monthly or yearly values computed from a process such + as ``climatological_normal()``. Must contain exactly one temporal dimension with the following dimension + labels for the given period: * `hour`: `00` - `23` * `day`: `001` - `365` * `week`: `01` - `52` * `dekad`: + `00` - `36` * `month`: `01` - `12` * `season`: `djf` (December - February), `mam` (March - May), `jja` + (June - August), `son` (September - November) * `tropical-season`: `ndjfma` (November - April), `mjjaso` + (May - October) * `year`: Four-digit year numbers * `decade`: Four-digit year numbers, the last digit being + a `0` * `decade-ad`: Four-digit year numbers, the last digit being a `1` * `single-period` / `climatology- + period`: A single dimension label with any name is expected. + :param period: Specifies the time intervals available in the normals data cube. The following options are + available: * `hour`: Hour of the day * `day`: Day of the year * `week`: Week of the year * `dekad`: Ten + day periods, counted per year with three periods per month (day 1 - 10, 11 - 20 and 21 - end of month). The + third dekad of the month can range from 8 to 11 days. For example, the fourth dekad is Feb, 1 - Feb, 10 + each year. * `month`: Month of the year * `season`: Three month periods of the calendar seasons (December - + February, March - May, June - August, September - November). * `tropical-season`: Six month periods of the + tropical seasons (November - April, May - October). * `year`: Proleptic years * `decade`: Ten year periods + ([0-to-9 decade](https://en.wikipedia.org/wiki/Decade#0-to-9_decade)), from a year ending in a 0 to the + next year ending in a 9. * `decade-ad`: Ten year periods ([1-to-0 + decade](https://en.wikipedia.org/wiki/Decade#1-to-0_decade)) better aligned with the anno Domini (AD) + calendar era, from a year ending in a 1 to the next year ending in a 0. * `single-period` / `climatology- + period`: A single period of arbitrary length + + :return: A data cube with the same dimensions. The dimension properties (name, type, labels, reference + system and resolution) remain unchanged. + """ + return _process('anomaly', data=data, normals=normals, period=period) + + +@openeo_process +def any(data, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Is at least one value true? + + :param data: A set of boolean values. + :param ignore_nodata: Indicates whether no-data values are ignored or not and ignores them by default. + + :return: Boolean result of the logical operation. + """ + return _process('any', data=data, ignore_nodata=ignore_nodata) + + +@openeo_process +def apply(data, process, context=UNSET) -> ProcessBuilder: + """ + Apply a process to each value + + :param data: A data cube. + :param process: A process that accepts and returns a single value and is applied on each individual value + in the data cube. The process may consist of multiple sub-processes and could, for example, consist of + processes such as ``absolute()`` or ``linear_scale_range()``. + :param context: Additional data to be passed to the process. + + :return: A data cube with the newly computed values and the same dimensions. The dimension properties + (name, type, labels, reference system and resolution) remain unchanged. + """ + return _process('apply', data=data, process=build_child_callback(process, parent_parameters=['x', 'context']), context=context) + + +@openeo_process +def apply_dimension(data, process, dimension, target_dimension=UNSET, context=UNSET) -> ProcessBuilder: + """ + Apply a process to all values along a dimension + + :param data: A data cube. + :param process: Process to be applied on all values along the given dimension. The specified process needs + to accept an array and must return an array with at least one element. A process may consist of multiple + sub-processes. + :param dimension: The name of the source dimension to apply the process on. Fails with a + `DimensionNotAvailable` exception if the specified dimension does not exist. + :param target_dimension: The name of the target dimension or `null` (the default) to use the source + dimension specified in the parameter `dimension`. By specifying a target dimension, the source dimension + is removed. The target dimension with the specified name and the type `other` (see ``add_dimension()``) is + created, if it doesn't exist yet. + :param context: Additional data to be passed to the process. + + :return: A data cube with the newly computed values. All dimensions stay the same, except for the + dimensions specified in corresponding parameters. There are three cases how the dimensions can change: 1. + The source dimension is the target dimension: - The (number of) dimensions remain unchanged as the + source dimension is the target dimension. - The source dimension properties name and type remain + unchanged. - The dimension labels, the reference system and the resolution are preserved only if the + number of values in the source dimension is equal to the number of values computed by the process. + Otherwise, all other dimension properties change as defined in the list below. 2. The source dimension is + not the target dimension. The target dimension exists with a single label only: - The number of + dimensions decreases by one as the source dimension is 'dropped' and the target dimension is filled with + the processed data that originates from the source dimension. - The target dimension properties name and + type remain unchanged. All other dimension properties change as defined in the list below. 3. The source + dimension is not the target dimension and the latter does not exist: - The number of dimensions remain + unchanged, but the source dimension is replaced with the target dimension. - The target dimension has + the specified name and the type other. All other dimension properties are set as defined in the list below. + Unless otherwise stated above, for the given (target) dimension the following applies: - the number of + dimension labels is equal to the number of values computed by the process, - the dimension labels are + incrementing integers starting from zero, - the resolution changes, and - the reference system is + undefined. + """ + return _process('apply_dimension', + data=data, + process=build_child_callback(process, parent_parameters=['data', 'context']), + dimension=dimension, + target_dimension=target_dimension, + context=context + ) + + +@openeo_process +def apply_kernel(data, kernel, factor=UNSET, border=UNSET, replace_invalid=UNSET) -> ProcessBuilder: + """ + Apply a spatial convolution with a kernel + + :param data: A raster data cube. + :param kernel: Kernel as a two-dimensional array of weights. The inner level of the nested array aligns + with the `x` axis and the outer level aligns with the `y` axis. Each level of the kernel must have an + uneven number of elements, otherwise the process throws a `KernelDimensionsUneven` exception. + :param factor: A factor that is multiplied to each value after the kernel has been applied. This is + basically a shortcut for explicitly multiplying each value by a factor afterwards, which is often required + for some kernel-based algorithms such as the Gaussian blur. + :param border: Determines how the data is extended when the kernel overlaps with the borders. Defaults to + fill the border with zeroes. The following options are available: * *numeric value* - fill with a user- + defined constant number `n`: `nnnnnn|abcdefgh|nnnnnn` (default, with `n` = 0) * `replicate` - repeat the + value from the pixel at the border: `aaaaaa|abcdefgh|hhhhhh` * `reflect` - mirror/reflect from the border: + `fedcba|abcdefgh|hgfedc` * `reflect_pixel` - mirror/reflect from the center of the pixel at the border: + `gfedcb|abcdefgh|gfedcb` * `wrap` - repeat/wrap the image: `cdefgh|abcdefgh|abcdef` + :param replace_invalid: This parameter specifies the value to replace non-numerical or infinite numerical + values with. By default, those values are replaced with zeroes. + + :return: A data cube with the newly computed values and the same dimensions. The dimension properties + (name, type, labels, reference system and resolution) remain unchanged. + """ + return _process('apply_kernel', data=data, kernel=kernel, factor=factor, border=border, replace_invalid=replace_invalid) + + +@openeo_process +def apply_neighborhood(data, process, size, overlap=UNSET, context=UNSET) -> ProcessBuilder: + """ + Apply a process to pixels in a n-dimensional neighborhood + + :param data: A raster data cube. + :param process: Process to be applied on all neighborhoods. + :param size: Neighborhood sizes along each dimension. This object maps dimension names to either a + physical measure (e.g. 100 m, 10 days) or pixels (e.g. 32 pixels). For dimensions not specified, the + default is to provide all values. Be aware that including all values from overly large dimensions may not + be processed at once. + :param overlap: Overlap of neighborhoods along each dimension to avoid border effects. By default no + overlap is provided. For instance a temporal dimension can add 1 month before and after a neighborhood. In + the spatial dimensions, this is often a number of pixels. The overlap specified is added before and after, + so an overlap of 8 pixels will add 8 pixels on both sides of the window, so 16 in total. Be aware that + large overlaps increase the need for computational resources and modifying overlapping data in subsequent + operations have no effect. + :param context: Additional data to be passed to the process. + + :return: A raster data cube with the newly computed values and the same dimensions. The dimension + properties (name, type, labels, reference system and resolution) remain unchanged. + """ + return _process('apply_neighborhood', + data=data, + process=build_child_callback(process, parent_parameters=['data', 'context']), + size=size, + overlap=overlap, + context=context + ) + + +@openeo_process +def apply_polygon(data, polygons, process, mask_value=UNSET, context=UNSET) -> ProcessBuilder: + """ + Apply a process to segments of the data cube + + :param data: A data cube. + :param polygons: A vector data cube containing at least one polygon. The provided vector data can be one of + the following: * A `Polygon` or `MultiPolygon` geometry, * a `Feature` with a `Polygon` or `MultiPolygon` + geometry, or * a `FeatureCollection` containing at least one `Feature` with `Polygon` or `MultiPolygon` + geometries. * Empty geometries are ignored. + :param process: A process that accepts and returns a single data cube and is applied on each individual sub + data cube. The process may consist of multiple sub-processes. + :param mask_value: All pixels for which the point at the pixel center **does not** intersect with the + polygon are replaced with the given value, which defaults to `null` (no data). It can provide a + distinction between no data values within the polygon and masked pixels outside of it. + :param context: Additional data to be passed to the process. + + :return: A data cube with the newly computed values and the same dimensions. The dimension properties + (name, type, labels, reference system and resolution) remain unchanged. + """ + return _process('apply_polygon', + data=data, + polygons=polygons, + process=build_child_callback(process, parent_parameters=['data', 'context']), + mask_value=mask_value, + context=context + ) + + +@openeo_process +def arccos(x) -> ProcessBuilder: + """ + Inverse cosine + + :param x: A number. + + :return: The computed angle in radians. + """ + return _process('arccos', x=x) + + +@openeo_process +def arcosh(x) -> ProcessBuilder: + """ + Inverse hyperbolic cosine + + :param x: A number. + + :return: The computed angle in radians. + """ + return _process('arcosh', x=x) + + +@openeo_process +def arcsin(x) -> ProcessBuilder: + """ + Inverse sine + + :param x: A number. + + :return: The computed angle in radians. + """ + return _process('arcsin', x=x) + + +@openeo_process +def arctan(x) -> ProcessBuilder: + """ + Inverse tangent + + :param x: A number. + + :return: The computed angle in radians. + """ + return _process('arctan', x=x) + + +@openeo_process +def arctan2(y, x) -> ProcessBuilder: + """ + Inverse tangent of two numbers + + :param y: A number to be used as the dividend. + :param x: A number to be used as the divisor. + + :return: The computed angle in radians. + """ + return _process('arctan2', y=y, x=x) + + +@openeo_process +def ard_normalized_radar_backscatter(data, elevation_model=UNSET, contributing_area=UNSET, ellipsoid_incidence_angle=UNSET, noise_removal=UNSET, options=UNSET) -> ProcessBuilder: + """ + CARD4L compliant SAR NRB generation + + :param data: The source data cube containing SAR input. + :param elevation_model: The digital elevation model to use. Set to `null` (the default) to allow the back- + end to choose, which will improve portability, but reduce reproducibility. + :param contributing_area: If set to `true`, a DEM-based local contributing area band named + `contributing_area` is added. The values are given in square meters. + :param ellipsoid_incidence_angle: If set to `true`, an ellipsoidal incidence angle band named + `ellipsoid_incidence_angle` is added. The values are given in degrees. + :param noise_removal: If set to `false`, no noise removal is applied. Defaults to `true`, which removes + noise. + :param options: Proprietary options for the backscatter computations. Specifying proprietary options will + reduce portability. + + :return: Backscatter values expressed as gamma0 in linear scale. In addition to the bands + `contributing_area` and `ellipsoid_incidence_angle` that can optionally be added with corresponding + parameters, the following bands are always added to the data cube: - `mask`: A data mask that indicates + which values are valid (1), invalid (0) or contain no-data (null). - `local_incidence_angle`: A band with + DEM-based local incidence angles in degrees. The data returned is CARD4L compliant with corresponding + metadata. + """ + return _process('ard_normalized_radar_backscatter', + data=data, + elevation_model=elevation_model, + contributing_area=contributing_area, + ellipsoid_incidence_angle=ellipsoid_incidence_angle, + noise_removal=noise_removal, + options=options + ) + + +@openeo_process +def ard_surface_reflectance(data, atmospheric_correction_method, cloud_detection_method, elevation_model=UNSET, atmospheric_correction_options=UNSET, cloud_detection_options=UNSET) -> ProcessBuilder: + """ + CARD4L compliant Surface Reflectance generation + + :param data: The source data cube containing multi-spectral optical top of the atmosphere (TOA) + reflectances. There must be a single dimension of type `bands` available. + :param atmospheric_correction_method: The atmospheric correction method to use. + :param cloud_detection_method: The cloud detection method to use. Each method supports detecting different + atmospheric disturbances such as clouds, cloud shadows, aerosols, haze, ozone and/or water vapour in + optical imagery. + :param elevation_model: The digital elevation model to use. Set to `null` (the default) to allow the back- + end to choose, which will improve portability, but reduce reproducibility. + :param atmospheric_correction_options: Proprietary options for the atmospheric correction method. + Specifying proprietary options will reduce portability. + :param cloud_detection_options: Proprietary options for the cloud detection method. Specifying proprietary + options will reduce portability. + + :return: Data cube containing bottom of atmosphere reflectances for each spectral band in the source data + cube, with atmospheric disturbances like clouds and cloud shadows removed. No-data values (null) are + directly set in the bands. Depending on the methods used, several additional bands will be added to the + data cube: Data cube containing bottom of atmosphere reflectances for each spectral band in the source + data cube, with atmospheric disturbances like clouds and cloud shadows removed. Depending on the methods + used, several additional bands will be added to the data cube: - `date` (optional): Specifies per-pixel + acquisition timestamps. - `incomplete-testing` (required): Identifies pixels with a value of 1 for which + the per-pixel tests (at least saturation, cloud and cloud shadows, see CARD4L specification for details) + have not all been successfully completed. Otherwise, the value is 0. - `saturation` (required) / + `saturation_{band}` (optional): Indicates where pixels in the input spectral bands are saturated (1) or not + (0). If the saturation is given per band, the band names are `saturation_{band}` with `{band}` being the + band name from the source data cube. - `cloud`, `shadow` (both required),`aerosol`, `haze`, `ozone`, + `water_vapor` (all optional): Indicates the probability of pixels being an atmospheric disturbance such as + clouds. All bands have values between 0 (clear) and 1, which describes the probability that it is an + atmospheric disturbance. - `snow-ice` (optional): Points to a file that indicates whether a pixel is + assessed as being snow/ice (1) or not (0). All values describe the probability and must be between 0 and 1. + - `land-water` (optional): Indicates whether a pixel is assessed as being land (1) or water (0). All values + describe the probability and must be between 0 and 1. - `incidence-angle` (optional): Specifies per-pixel + incidence angles in degrees. - `azimuth` (optional): Specifies per-pixel azimuth angles in degrees. - `sun- + azimuth:` (optional): Specifies per-pixel sun azimuth angles in degrees. - `sun-elevation` (optional): + Specifies per-pixel sun elevation angles in degrees. - `terrain-shadow` (optional): Indicates with a value + of 1 whether a pixel is not directly illuminated due to terrain shadowing. Otherwise, the value is 0. - + `terrain-occlusion` (optional): Indicates with a value of 1 whether a pixel is not visible to the sensor + due to terrain occlusion during off-nadir viewing. Otherwise, the value is 0. - `terrain-illumination` + (optional): Contains coefficients used for terrain illumination correction are provided for each pixel. + The data returned is CARD4L compliant with corresponding metadata. + """ + return _process('ard_surface_reflectance', + data=data, + atmospheric_correction_method=atmospheric_correction_method, + cloud_detection_method=cloud_detection_method, + elevation_model=elevation_model, + atmospheric_correction_options=atmospheric_correction_options, + cloud_detection_options=cloud_detection_options + ) + + +@openeo_process +def array_append(data, value, label=UNSET) -> ProcessBuilder: + """ + Append a value to an array + + :param data: An array. + :param value: Value to append to the array. + :param label: If the given array is a labeled array, a new label for the new value should be given. If not + given or `null`, the array index as string is used as the label. If in any case the label exists, a + `LabelExists` exception is thrown. + + :return: The new array with the value being appended. + """ + return _process('array_append', data=data, value=value, label=label) + + +@openeo_process +def array_apply(data, process, context=UNSET) -> ProcessBuilder: + """ + Apply a process to each array element + + :param data: An array. + :param process: A process that accepts and returns a single value and is applied on each individual value + in the array. The process may consist of multiple sub-processes and could, for example, consist of + processes such as ``absolute()`` or ``linear_scale_range()``. + :param context: Additional data to be passed to the process. + + :return: An array with the newly computed values. The number of elements are the same as for the original + array. + """ + return _process('array_apply', + data=data, + process=build_child_callback(process, parent_parameters=['x', 'index', 'label', 'context']), + context=context + ) + + +@openeo_process +def array_concat(array1, array2) -> ProcessBuilder: + """ + Merge two arrays + + :param array1: The first array. + :param array2: The second array. + + :return: The merged array. + """ + return _process('array_concat', array1=array1, array2=array2) + + +@openeo_process +def array_contains(data, value) -> ProcessBuilder: + """ + Check whether the array contains a given value + + :param data: List to find the value in. + :param value: Value to find in `data`. If the value is `null`, this process returns always `false`. + + :return: `true` if the list contains the value, false` otherwise. + """ + return _process('array_contains', data=data, value=value) + + +@openeo_process +def array_create(data=UNSET, repeat=UNSET) -> ProcessBuilder: + """ + Create an array + + :param data: A (native) array to fill the newly created array with. Defaults to an empty array. + :param repeat: The number of times the (native) array specified in `data` is repeatedly added after each + other to the new array being created. Defaults to `1`. + + :return: The newly created array. + """ + return _process('array_create', data=data, repeat=repeat) + + +@openeo_process +def array_create_labeled(data, labels) -> ProcessBuilder: + """ + Create a labeled array + + :param data: An array of values to be used. + :param labels: An array of labels to be used. + + :return: The newly created labeled array. + """ + return _process('array_create_labeled', data=data, labels=labels) + + +@openeo_process +def array_element(data, index=UNSET, label=UNSET, return_nodata=UNSET) -> ProcessBuilder: + """ + Get an element from an array + + :param data: An array. + :param index: The zero-based index of the element to retrieve. + :param label: The label of the element to retrieve. Throws an `ArrayNotLabeled` exception, if the given + array is not a labeled array and this parameter is set. + :param return_nodata: By default this process throws an `ArrayElementNotAvailable` exception if the index + or label is invalid. If you want to return `null` instead, set this flag to `true`. + + :return: The value of the requested element. + """ + return _process('array_element', data=data, index=index, label=label, return_nodata=return_nodata) + + +@openeo_process +def array_filter(data, condition, context=UNSET) -> ProcessBuilder: + """ + Filter an array based on a condition + + :param data: An array. + :param condition: A condition that is evaluated against each value, index and/or label in the array. Only + the array elements for which the condition returns `true` are preserved. + :param context: Additional data to be passed to the condition. + + :return: An array filtered by the specified condition. The number of elements are less than or equal + compared to the original array. + """ + return _process('array_filter', + data=data, + condition=build_child_callback(condition, parent_parameters=['x', 'index', 'label', 'context']), + context=context + ) + + +@openeo_process +def array_find(data, value, reverse=UNSET) -> ProcessBuilder: + """ + Get the index for a value in an array + + :param data: List to find the value in. + :param value: Value to find in `data`. If the value is `null`, this process returns always `null`. + :param reverse: By default, this process finds the index of the first match. To return the index of the + last match instead, set this flag to `true`. + + :return: The index of the first element with the specified value. If no element was found, `null` is + returned. + """ + return _process('array_find', data=data, value=value, reverse=reverse) + + +@openeo_process +def array_find_label(data, label) -> ProcessBuilder: + """ + Get the index for a label in a labeled array + + :param data: List to find the label in. + :param label: Label to find in `data`. + + :return: The index of the element with the specified label assigned. If no such label was found, `null` is + returned. + """ + return _process('array_find_label', data=data, label=label) + + +@openeo_process +def array_interpolate_linear(data) -> ProcessBuilder: + """ + One-dimensional linear interpolation for arrays + + :param data: An array of numbers and no-data values. If the given array is a labeled array, the labels + must have a natural/inherent label order and the process expects the labels to be sorted accordingly. This + is the default behavior in openEO for spatial and temporal dimensions. + + :return: An array with no-data values being replaced with interpolated values. If not at least 2 numerical + values are available in the array, the array stays the same. + """ + return _process('array_interpolate_linear', data=data) + + +@openeo_process +def array_labels(data) -> ProcessBuilder: + """ + Get the labels for an array + + :param data: An array. + + :return: The labels or indices as array. + """ + return _process('array_labels', data=data) + + +@openeo_process +def array_modify(data, values, index, length=UNSET) -> ProcessBuilder: + """ + Change the content of an array (remove, insert, update) + + :param data: The array to modify. + :param values: The values to insert into the `data` array. + :param index: The index in the `data` array of the element to insert the value(s) before. If the index is + greater than the number of elements in the `data` array, the process throws an `ArrayElementNotAvailable` + exception. To insert after the last element, there are two options: 1. Use the simpler processes + ``array_append()`` to append a single value or ``array_concat()`` to append multiple values. 2. Specify the + number of elements in the array. You can retrieve the number of elements with the process ``count()``, + having the parameter `condition` set to `true`. + :param length: The number of elements in the `data` array to remove (or replace) starting from the given + index. If the array contains fewer elements, the process simply removes all elements up to the end. + + :return: An array with values added, updated or removed. + """ + return _process('array_modify', data=data, values=values, index=index, length=length) + + +@openeo_process +def arsinh(x) -> ProcessBuilder: + """ + Inverse hyperbolic sine + + :param x: A number. + + :return: The computed angle in radians. + """ + return _process('arsinh', x=x) + + +@openeo_process +def artanh(x) -> ProcessBuilder: + """ + Inverse hyperbolic tangent + + :param x: A number. + + :return: The computed angle in radians. + """ + return _process('artanh', x=x) + + +@openeo_process +def atmospheric_correction(data, method, elevation_model=UNSET, options=UNSET) -> ProcessBuilder: + """ + Apply atmospheric correction + + :param data: Data cube containing multi-spectral optical top of atmosphere reflectances to be corrected. + :param method: The atmospheric correction method to use. To get reproducible results, you have to set a + specific method. Set to `null` to allow the back-end to choose, which will improve portability, but reduce + reproducibility as you *may* get different results if you run the processes multiple times. + :param elevation_model: The digital elevation model to use. Set to `null` (the default) to allow the back- + end to choose, which will improve portability, but reduce reproducibility. + :param options: Proprietary options for the atmospheric correction method. Specifying proprietary options + will reduce portability. + + :return: Data cube containing bottom of atmosphere reflectances. + """ + return _process('atmospheric_correction', data=data, method=method, elevation_model=elevation_model, options=options) + + +@openeo_process +def between(x, min, max, exclude_max=UNSET) -> ProcessBuilder: + """ + Between comparison + + :param x: The value to check. + :param min: Lower boundary (inclusive) to check against. + :param max: Upper boundary (inclusive) to check against. + :param exclude_max: Exclude the upper boundary `max` if set to `true`. Defaults to `false`. + + :return: `true` if `x` is between the specified bounds, otherwise `false`. + """ + return _process('between', x=x, min=min, max=max, exclude_max=exclude_max) + + +@openeo_process +def ceil(x) -> ProcessBuilder: + """ + Round fractions up + + :param x: A number to round up. + + :return: The number rounded up. + """ + return _process('ceil', x=x) + + +@openeo_process +def climatological_normal(data, period, climatology_period=UNSET) -> ProcessBuilder: + """ + Compute climatology normals + + :param data: A data cube with exactly one temporal dimension. The data cube must span at least the temporal + interval specified in the parameter `climatology-period`. Seasonal periods may span two consecutive years, + e.g. temporal winter that includes months December, January and February. If the required months before the + actual climate period are available, the season is taken into account. If not available, the first season + is not taken into account and the seasonal mean is based on one year less than the other seasonal normals. + The incomplete season at the end of the last year is never taken into account. + :param period: The time intervals to aggregate the average value for. The following pre-defined frequencies + are supported: * `day`: Day of the year * `month`: Month of the year * `climatology-period`: The period + specified in the `climatology-period`. * `season`: Three month periods of the calendar seasons (December - + February, March - May, June - August, September - November). * `tropical-season`: Six month periods of the + tropical seasons (November - April, May - October). + :param climatology_period: The climatology period as a closed temporal interval. The first element of the + array is the first year to be fully included in the temporal interval. The second element is the last year + to be fully included in the temporal interval. The default climatology period is from 1981 until 2010 + (both inclusive) right now, but this might be updated over time to what is commonly used in climatology. If + you don't want to keep your research to be reproducible, please explicitly specify a period. + + :return: A data cube with the same dimensions. The dimension properties (name, type, labels, reference + system and resolution) remain unchanged, except for the resolution and dimension labels of the temporal + dimension. The temporal dimension has the following dimension labels: * `day`: `001` - `365` * `month`: + `01` - `12` * `climatology-period`: `climatology-period` * `season`: `djf` (December - February), `mam` + (March - May), `jja` (June - August), `son` (September - November) * `tropical-season`: `ndjfma` (November + - April), `mjjaso` (May - October) + """ + return _process('climatological_normal', data=data, period=period, climatology_period=climatology_period) + + +@openeo_process +def clip(x, min, max) -> ProcessBuilder: + """ + Clip a value between a minimum and a maximum + + :param x: A number. + :param min: Minimum value. If the value is lower than this value, the process will return the value of this + parameter. + :param max: Maximum value. If the value is greater than this value, the process will return the value of + this parameter. + + :return: The value clipped to the specified range. + """ + return _process('clip', x=x, min=min, max=max) + + +@openeo_process +def cloud_detection(data, method, options=UNSET) -> ProcessBuilder: + """ + Create cloud masks + + :param data: The source data cube containing multi-spectral optical top of the atmosphere (TOA) + reflectances on which to perform cloud detection. + :param method: The cloud detection method to use. To get reproducible results, you have to set a specific + method. Set to `null` to allow the back-end to choose, which will improve portability, but reduce + reproducibility as you *may* get different results if you run the processes multiple times. + :param options: Proprietary options for the cloud detection method. Specifying proprietary options will + reduce portability. + + :return: A data cube with bands for the atmospheric disturbances. Each of the masks contains values between + 0 and 1. The data cube has the same spatial and temporal dimensions as the source data cube and a dimension + that contains a dimension label for each of the supported/considered atmospheric disturbance. + """ + return _process('cloud_detection', data=data, method=method, options=options) + + +@openeo_process +def constant(x) -> ProcessBuilder: + """ + Define a constant value + + :param x: The value of the constant. + + :return: The value of the constant. + """ + return _process('constant', x=x) + + +@openeo_process +def cos(x) -> ProcessBuilder: + """ + Cosine + + :param x: An angle in radians. + + :return: The computed cosine of `x`. + """ + return _process('cos', x=x) + + +@openeo_process +def cosh(x) -> ProcessBuilder: + """ + Hyperbolic cosine + + :param x: An angle in radians. + + :return: The computed hyperbolic cosine of `x`. + """ + return _process('cosh', x=x) + + +@openeo_process +def count(data, condition=UNSET, context=UNSET) -> ProcessBuilder: + """ + Count the number of elements + + :param data: An array with elements of any data type. + :param condition: A condition consists of one or more processes, which in the end return a boolean value. + It is evaluated against each element in the array. An element is counted only if the condition returns + `true`. Defaults to count valid elements in a list (see ``is_valid()``). Setting this parameter to boolean + `true` counts all elements in the list. `false` is not a valid value for this parameter. + :param context: Additional data to be passed to the condition. + + :return: The counted number of elements. + """ + return _process('count', data=data, condition=condition, context=context) + + +@openeo_process +def create_data_cube() -> ProcessBuilder: + """ + Create an empty data cube + + :return: An empty data cube with no dimensions. + """ + return _process('create_data_cube', ) + + +@openeo_process +def cummax(data, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Cumulative maxima + + :param data: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not and ignores them by default. + Setting this flag to `false` considers no-data values so that `null` is set for all the following elements. + + :return: An array with the computed cumulative maxima. + """ + return _process('cummax', data=data, ignore_nodata=ignore_nodata) + + +@openeo_process +def cummin(data, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Cumulative minima + + :param data: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not and ignores them by default. + Setting this flag to `false` considers no-data values so that `null` is set for all the following elements. + + :return: An array with the computed cumulative minima. + """ + return _process('cummin', data=data, ignore_nodata=ignore_nodata) + + +@openeo_process +def cumproduct(data, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Cumulative products + + :param data: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not and ignores them by default. + Setting this flag to `false` considers no-data values so that `null` is set for all the following elements. + + :return: An array with the computed cumulative products. + """ + return _process('cumproduct', data=data, ignore_nodata=ignore_nodata) + + +@openeo_process +def cumsum(data, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Cumulative sums + + :param data: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not and ignores them by default. + Setting this flag to `false` considers no-data values so that `null` is set for all the following elements. + + :return: An array with the computed cumulative sums. + """ + return _process('cumsum', data=data, ignore_nodata=ignore_nodata) + + +@openeo_process +def date_between(x, min, max, exclude_max=UNSET) -> ProcessBuilder: + """ + Between comparison for dates and times + + :param x: The value to check. + :param min: Lower boundary (inclusive) to check against. + :param max: Upper boundary (inclusive) to check against. + :param exclude_max: Exclude the upper boundary `max` if set to `true`. Defaults to `false`. + + :return: `true` if `x` is between the specified bounds, otherwise `false`. + """ + return _process('date_between', x=x, min=min, max=max, exclude_max=exclude_max) + + +@openeo_process +def date_difference(date1, date2, unit=UNSET) -> ProcessBuilder: + """ + Computes the difference between two time instants + + :param date1: The base date, optionally with a time component. + :param date2: The other date, optionally with a time component. + :param unit: The unit for the returned value. The following units are available: - millisecond - second - + leap seconds are ignored in computations. - minute - hour - day - month - year + + :return: Returns the difference between date1 and date2 in the given unit (seconds by default), including a + fractional part if required. For comparison purposes this means: - If `date1` < `date2`, the returned + value is positive. - If `date1` = `date2`, the returned value is 0. - If `date1` > `date2`, the returned + value is negative. + """ + return _process('date_difference', date1=date1, date2=date2, unit=unit) + + +@openeo_process +def date_shift(date, value, unit) -> ProcessBuilder: + """ + Manipulates dates and times by addition or subtraction + + :param date: The date (and optionally time) to manipulate. If the given date doesn't include the time, the + process assumes that the time component is `00:00:00Z` (i.e. midnight, in UTC). The millisecond part of the + time is optional and defaults to `0` if not given. + :param value: The period of time in the unit given that is added (positive numbers) or subtracted (negative + numbers). The value `0` doesn't have any effect. + :param unit: The unit for the value given. The following pre-defined units are available: - millisecond: + Milliseconds - second: Seconds - leap seconds are ignored in computations. - minute: Minutes - hour: Hours + - day: Days - changes only the the day part of a date - week: Weeks (equivalent to 7 days) - month: Months + - year: Years Manipulations with the unit `year`, `month`, `week` or `day` do never change the time. If + any of the manipulations result in an invalid date or time, the corresponding part is rounded down to the + next valid date or time respectively. For example, adding a month to `2020-01-31` would result in + `2020-02-29`. + + :return: The manipulated date. If a time component was given in the parameter `date`, the time component is + returned with the date. + """ + return _process('date_shift', date=date, value=value, unit=unit) + + +@openeo_process +def dimension_labels(data, dimension) -> ProcessBuilder: + """ + Get the dimension labels + + :param data: The data cube. + :param dimension: The name of the dimension to get the labels for. + + :return: The labels as an array. + """ + return _process('dimension_labels', data=data, dimension=dimension) + + +@openeo_process +def divide(x, y) -> ProcessBuilder: + """ + Division of two numbers + + :param x: The dividend. + :param y: The divisor. + + :return: The computed result. + """ + return _process('divide', x=x, y=y) + + +@openeo_process +def drop_dimension(data, name) -> ProcessBuilder: + """ + Remove a dimension + + :param data: The data cube to drop a dimension from. + :param name: Name of the dimension to drop. + + :return: A data cube without the specified dimension. The number of dimensions decreases by one, but the + dimension properties (name, type, labels, reference system and resolution) for all other dimensions remain + unchanged. + """ + return _process('drop_dimension', data=data, name=name) + + +@openeo_process +def e() -> ProcessBuilder: + """ + Euler's number (e) + + :return: The numerical value of Euler's number. + """ + return _process('e', ) + + +@openeo_process +def eq(x, y, delta=UNSET, case_sensitive=UNSET) -> ProcessBuilder: + """ + Equal to comparison + + :param x: First operand. + :param y: Second operand. + :param delta: Only applicable for comparing two numbers. If this optional parameter is set to a positive + non-zero number the equality of two numbers is checked against a delta value. This is especially useful to + circumvent problems with floating-point inaccuracy in machine-based computation. This option is basically + an alias for the following computation: `lte(abs(minus([x, y]), delta)` + :param case_sensitive: Only applicable for comparing two strings. Case sensitive comparison can be disabled + by setting this parameter to `false`. + + :return: `true` if `x` is equal to `y`, `null` if any operand is `null`, otherwise `false`. + """ + return _process('eq', x=x, y=y, delta=delta, case_sensitive=case_sensitive) + + +@openeo_process +def exp(p) -> ProcessBuilder: + """ + Exponentiation to the base e + + :param p: The numerical exponent. + + :return: The computed value for *e* raised to the power of `p`. + """ + return _process('exp', p=p) + + +@openeo_process +def extrema(data, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Minimum and maximum values + + :param data: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. Setting + this flag to `false` considers no-data values so that an array with two `null` values is returned if any + value is such a value. + + :return: An array containing the minimum and maximum values for the specified numbers. The first element is + the minimum, the second element is the maximum. If the input array is empty both elements are set to + `null`. + """ + return _process('extrema', data=data, ignore_nodata=ignore_nodata) + + +@openeo_process +def filter_bands(data, bands=UNSET, wavelengths=UNSET) -> ProcessBuilder: + """ + Filter the bands by names + + :param data: A data cube with bands. + :param bands: A list of band names. Either the unique band name (metadata field `name` in bands) or one of + the common band names (metadata field `common_name` in bands). If the unique band name and the common name + conflict, the unique band name has a higher priority. The order of the specified array defines the order + of the bands in the data cube. If multiple bands match a common name, all matched bands are included in the + original order. + :param wavelengths: A list of sub-lists with each sub-list consisting of two elements. The first element is + the minimum wavelength and the second element is the maximum wavelength. Wavelengths are specified in + micrometers (μm). The order of the specified array defines the order of the bands in the data cube. If + multiple bands match the wavelengths, all matched bands are included in the original order. + + :return: A data cube limited to a subset of its original bands. The dimensions and dimension properties + (name, type, labels, reference system and resolution) remain unchanged, except that the dimension of type + `bands` has less (or the same) dimension labels. + """ + return _process('filter_bands', data=data, bands=bands, wavelengths=wavelengths) + + +@openeo_process +def filter_bbox(data, extent) -> ProcessBuilder: + """ + Spatial filter using a bounding box + + :param data: A data cube. + :param extent: A bounding box, which may include a vertical axis (see `base` and `height`). + + :return: A data cube restricted to the bounding box. The dimensions and dimension properties (name, type, + labels, reference system and resolution) remain unchanged, except that the spatial dimensions have less (or + the same) dimension labels. + """ + return _process('filter_bbox', data=data, extent=extent) + + +@openeo_process +def filter_labels(data, condition, dimension, context=UNSET) -> ProcessBuilder: + """ + Filter dimension labels based on a condition + + :param data: A data cube. + :param condition: A condition that is evaluated against each dimension label in the specified dimension. A + dimension label and the corresponding data is preserved for the given dimension, if the condition returns + `true`. + :param dimension: The name of the dimension to filter on. Fails with a `DimensionNotAvailable` exception if + the specified dimension does not exist. + :param context: Additional data to be passed to the condition. + + :return: A data cube with the same dimensions. The dimension properties (name, type, labels, reference + system and resolution) remain unchanged, except that the given dimension has less (or the same) dimension + labels. + """ + return _process('filter_labels', + data=data, + condition=build_child_callback(condition, parent_parameters=['value', 'context']), + dimension=dimension, + context=context + ) + + +@openeo_process +def filter_spatial(data, geometries) -> ProcessBuilder: + """ + Spatial filter raster data cubes using geometries + + :param data: A raster data cube. + :param geometries: One or more geometries used for filtering, given as GeoJSON or vector data cube. If + multiple geometries are provided, the union of them is used. Empty geometries are ignored. Limits the data + cube to the bounding box of the given geometries. No implicit masking gets applied. To mask the pixels of + the data cube use ``mask_polygon()``. + + :return: A raster data cube restricted to the specified geometries. The dimensions and dimension properties + (name, type, labels, reference system and resolution) remain unchanged, except that the spatial dimensions + have less (or the same) dimension labels. + """ + return _process('filter_spatial', data=data, geometries=geometries) + + +@openeo_process +def filter_temporal(data, extent, dimension=UNSET) -> ProcessBuilder: + """ + Temporal filter based on temporal intervals + + :param data: A data cube. + :param extent: Left-closed temporal interval, i.e. an array with exactly two elements: 1. The first + element is the start of the temporal interval. The specified time instant is **included** in the interval. + 2. The second element is the end of the temporal interval. The specified time instant is **excluded** from + the interval. The second element must always be greater/later than the first element. Otherwise, a + `TemporalExtentEmpty` exception is thrown. Also supports unbounded intervals by setting one of the + boundaries to `null`, but never both. + :param dimension: The name of the temporal dimension to filter on. If no specific dimension is specified, + the filter applies to all temporal dimensions. Fails with a `DimensionNotAvailable` exception if the + specified dimension does not exist. + + :return: A data cube restricted to the specified temporal extent. The dimensions and dimension properties + (name, type, labels, reference system and resolution) remain unchanged, except that the temporal dimensions + (determined by `dimensions` parameter) may have less dimension labels. + """ + return _process('filter_temporal', data=data, extent=extent, dimension=dimension) + + +@openeo_process +def filter_vector(data, geometries, relation=UNSET) -> ProcessBuilder: + """ + Spatial vector filter using geometries + + :param data: A vector data cube with the candidate geometries. + :param geometries: One or more base geometries used for filtering, given as vector data cube. If multiple + base geometries are provided, the union of them is used. + :param relation: The spatial filter predicate for comparing the geometries provided through (a) + `geometries` (base geometries) and (b) `data` (candidate geometries). + + :return: A vector data cube restricted to the specified geometries. The dimensions and dimension properties + (name, type, labels, reference system and resolution) remain unchanged, except that the geometries + dimension has less (or the same) dimension labels. + """ + return _process('filter_vector', data=data, geometries=geometries, relation=relation) + + +@openeo_process +def first(data, ignore_nodata=UNSET) -> ProcessBuilder: + """ + First element + + :param data: An array with elements of any data type. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. Setting + this flag to `false` considers no-data values so that `null` is returned if the first value is such a + value. + + :return: The first element of the input array. + """ + return _process('first', data=data, ignore_nodata=ignore_nodata) + + +@openeo_process +def fit_curve(data, parameters, function, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Curve fitting + + :param data: A labeled array, the labels correspond to the variable `y` and the values correspond to the + variable `x`. + :param parameters: Defined the number of parameters for the model function and provides an initial guess + for them. At least one parameter is required. + :param function: The model function. It must take the parameters to fit as array through the first argument + and the independent variable `x` as the second argument. It is recommended to store the model function as + a user-defined process on the back-end to be able to re-use the model function with the computed optimal + values for the parameters afterwards. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. Setting + this flag to `false` considers no-data values so that `null` is passed to the model function. + + :return: An array with the optimal values for the parameters. + """ + return _process('fit_curve', + data=data, + parameters=parameters, + function=build_child_callback(function, parent_parameters=['x', 'parameters']), + ignore_nodata=ignore_nodata + ) + + +@openeo_process +def flatten_dimensions(data, dimensions, target_dimension, label_separator=UNSET) -> ProcessBuilder: + """ + Combine multiple dimensions into a single dimension + + :param data: A data cube. + :param dimensions: The names of the dimension to combine. The order of the array defines the order in which + the dimension labels and values are combined (see the example in the process description). Fails with a + `DimensionNotAvailable` exception if at least one of the specified dimensions does not exist. + :param target_dimension: The name of the new target dimension. A new dimensions will be created with the + given names and type `other` (see ``add_dimension()``). Fails with a `TargetDimensionExists` exception if a + dimension with the specified name exists. + :param label_separator: The string that will be used as a separator for the concatenated dimension labels. + To unambiguously revert the dimension labels with the process ``unflatten_dimension()``, the given string + must not be contained in any of the dimension labels. + + :return: A data cube with the new shape. The dimension properties (name, type, labels, reference system and + resolution) for all other dimensions remain unchanged. + """ + return _process('flatten_dimensions', data=data, dimensions=dimensions, target_dimension=target_dimension, label_separator=label_separator) + + +@openeo_process +def floor(x) -> ProcessBuilder: + """ + Round fractions down + + :param x: A number to round down. + + :return: The number rounded down. + """ + return _process('floor', x=x) + + +@openeo_process +def gt(x, y) -> ProcessBuilder: + """ + Greater than comparison + + :param x: First operand. + :param y: Second operand. + + :return: `true` if `x` is strictly greater than `y` or `null` if any operand is `null`, otherwise `false`. + """ + return _process('gt', x=x, y=y) + + +@openeo_process +def gte(x, y) -> ProcessBuilder: + """ + Greater than or equal to comparison + + :param x: First operand. + :param y: Second operand. + + :return: `true` if `x` is greater than or equal to `y`, `null` if any operand is `null`, otherwise `false`. + """ + return _process('gte', x=x, y=y) + + +@openeo_process +def if_(value, accept, reject=UNSET) -> ProcessBuilder: + """ + If-Then-Else conditional + + :param value: A boolean value. + :param accept: A value that is returned if the boolean value is `true`. + :param reject: A value that is returned if the boolean value is **not** `true`. Defaults to `null`. + + :return: Either the `accept` or `reject` argument depending on the given boolean value. + """ + return _process('if', value=value, accept=accept, reject=reject) + + +@openeo_process +def inspect(data, message=UNSET, code=UNSET, level=UNSET) -> ProcessBuilder: + """ + Add information to the logs + + :param data: Data to log. + :param message: A message to send in addition to the data. + :param code: A label to help identify one or more log entries originating from this process in the list of + all log entries. It can help to group or filter log entries and is usually not unique. + :param level: The severity level of this message, defaults to `info`. + + :return: The data as passed to the `data` parameter without any modification. + """ + return _process('inspect', data=data, message=message, code=code, level=level) + + +@openeo_process +def int(x) -> ProcessBuilder: + """ + Integer part of a number + + :param x: A number. + + :return: Integer part of the number. + """ + return _process('int', x=x) + + +@openeo_process +def is_infinite(x) -> ProcessBuilder: + """ + Value is an infinite number + + :param x: The data to check. + + :return: `true` if the data is an infinite number, otherwise `false`. + """ + return _process('is_infinite', x=x) + + +@openeo_process +def is_nan(x) -> ProcessBuilder: + """ + Value is not a number + + :param x: The data to check. + + :return: Returns `true` for `NaN` and all non-numeric data types, otherwise returns `false`. + """ + return _process('is_nan', x=x) + + +@openeo_process +def is_nodata(x) -> ProcessBuilder: + """ + Value is a no-data value + + :param x: The data to check. + + :return: `true` if the data is a no-data value, otherwise `false`. + """ + return _process('is_nodata', x=x) + + +@openeo_process +def is_valid(x) -> ProcessBuilder: + """ + Value is valid data + + :param x: The data to check. + + :return: `true` if the data is valid, otherwise `false`. + """ + return _process('is_valid', x=x) + + +@openeo_process +def last(data, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Last element + + :param data: An array with elements of any data type. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. Setting + this flag to `false` considers no-data values so that `null` is returned if the last value is such a value. + + :return: The last element of the input array. + """ + return _process('last', data=data, ignore_nodata=ignore_nodata) + + +@openeo_process +def linear_scale_range(x, inputMin, inputMax, outputMin=UNSET, outputMax=UNSET) -> ProcessBuilder: + """ + Linear transformation between two ranges + + :param x: A number to transform. The number gets clipped to the bounds specified in `inputMin` and + `inputMax`. + :param inputMin: Minimum value the input can obtain. + :param inputMax: Maximum value the input can obtain. + :param outputMin: Minimum value of the desired output range. + :param outputMax: Maximum value of the desired output range. + + :return: The transformed number. + """ + return _process('linear_scale_range', x=x, inputMin=inputMin, inputMax=inputMax, outputMin=outputMin, outputMax=outputMax) + + +@openeo_process +def ln(x) -> ProcessBuilder: + """ + Natural logarithm + + :param x: A number to compute the natural logarithm for. + + :return: The computed natural logarithm. + """ + return _process('ln', x=x) + + +@openeo_process +def load_collection(id, spatial_extent, temporal_extent, bands=UNSET, properties=UNSET) -> ProcessBuilder: + """ + Load a collection + + :param id: The collection id. + :param spatial_extent: Limits the data to load from the collection to the specified bounding box or + polygons. * For raster data, the process loads the pixel into the data cube if the point at the pixel + center intersects with the bounding box or any of the polygons (as defined in the Simple Features standard + by the OGC). * For vector data, the process loads the geometry into the data cube if the geometry is fully + *within* the bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). + Empty geometries may only be in the data cube if no spatial extent has been provided. The GeoJSON can be + one of the following feature types: * A `Polygon` or `MultiPolygon` geometry, * a `Feature` with a + `Polygon` or `MultiPolygon` geometry, or * a `FeatureCollection` containing at least one `Feature` with + `Polygon` or `MultiPolygon` geometries. * Empty geometries are ignored. Set this parameter to `null` to + set no limit for the spatial extent. Be careful with this when loading large datasets! It is recommended to + use this parameter instead of using ``filter_bbox()`` or ``filter_spatial()`` directly after loading + unbounded data. + :param temporal_extent: Limits the data to load from the collection to the specified left-closed temporal + interval. Applies to all temporal dimensions. The interval has to be specified as an array with exactly two + elements: 1. The first element is the start of the temporal interval. The specified time instant is + **included** in the interval. 2. The second element is the end of the temporal interval. The specified time + instant is **excluded** from the interval. The second element must always be greater/later than the first + element. Otherwise, a `TemporalExtentEmpty` exception is thrown. Also supports unbounded intervals by + setting one of the boundaries to `null`, but never both. Set this parameter to `null` to set no limit for + the temporal extent. Be careful with this when loading large datasets! It is recommended to use this + parameter instead of using ``filter_temporal()`` directly after loading unbounded data. + :param bands: Only adds the specified bands into the data cube so that bands that don't match the list of + band names are not available. Applies to all dimensions of type `bands`. Either the unique band name + (metadata field `name` in bands) or one of the common band names (metadata field `common_name` in bands) + can be specified. If the unique band name and the common name conflict, the unique band name has a higher + priority. The order of the specified array defines the order of the bands in the data cube. If multiple + bands match a common name, all matched bands are included in the original order. It is recommended to use + this parameter instead of using ``filter_bands()`` directly after loading unbounded data. + :param properties: Limits the data by metadata properties to include only data in the data cube which all + given conditions return `true` for (AND operation). Specify key-value-pairs with the key being the name of + the metadata property, which can be retrieved with the openEO Data Discovery for Collections. The value + must be a condition (user-defined process) to be evaluated against the collection metadata, see the + example. + + :return: A data cube for further processing. The dimensions and dimension properties (name, type, labels, + reference system and resolution) correspond to the collection's metadata, but the dimension labels are + restricted as specified in the parameters. + """ + return _process('load_collection', id=id, spatial_extent=spatial_extent, temporal_extent=temporal_extent, bands=bands, properties=properties) + + +@openeo_process +def load_geojson(data, properties=UNSET) -> ProcessBuilder: + """ + Converts GeoJSON into a vector data cube + + :param data: A GeoJSON object to convert into a vector data cube. The GeoJSON type `GeometryCollection` is + not supported. Each geometry in the GeoJSON data results in a dimension label in the `geometries` + dimension. + :param properties: A list of properties from the GeoJSON file to construct an additional dimension from. A + new dimension with the name `properties` and type `other` is created if at least one property is provided. + Only applies for GeoJSON Features and FeatureCollections. Missing values are generally set to no-data + (`null`). Depending on the number of properties provided, the process creates the dimension differently: + - Single property with scalar values: A single dimension label with the name of the property and a single + value per geometry. - Single property of type array: The dimension labels correspond to the array indices. + There are as many values and labels per geometry as there are for the largest array. - Multiple properties + with scalar values: The dimension labels correspond to the property names. There are as many values and + labels per geometry as there are properties provided here. + + :return: A vector data cube containing the geometries, either one or two dimensional. + """ + return _process('load_geojson', data=data, properties=properties) + + +@openeo_process +def load_ml_model(id) -> ProcessBuilder: + """ + Load a ML model + + :param id: The STAC Item to load the machine learning model from. The STAC Item must implement the `ml- + model` extension. + + :return: A machine learning model to be used with machine learning processes such as + ``predict_random_forest()``. + """ + return _process('load_ml_model', id=id) + + +@openeo_process +def load_result(id, spatial_extent=UNSET, temporal_extent=UNSET, bands=UNSET) -> ProcessBuilder: + """ + Load batch job results + + :param id: The id of a batch job with results. + :param spatial_extent: Limits the data to load from the batch job result to the specified bounding box or + polygons. * For raster data, the process loads the pixel into the data cube if the point at the pixel + center intersects with the bounding box or any of the polygons (as defined in the Simple Features standard + by the OGC). * For vector data, the process loads the geometry into the data cube of the geometry is fully + within the bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). + Empty geometries may only be in the data cube if no spatial extent has been provided. The GeoJSON can be + one of the following feature types: * A `Polygon` or `MultiPolygon` geometry, * a `Feature` with a + `Polygon` or `MultiPolygon` geometry, or * a `FeatureCollection` containing at least one `Feature` with + `Polygon` or `MultiPolygon` geometries. Set this parameter to `null` to set no limit for the spatial + extent. Be careful with this when loading large datasets! It is recommended to use this parameter instead + of using ``filter_bbox()`` or ``filter_spatial()`` directly after loading unbounded data. + :param temporal_extent: Limits the data to load from the batch job result to the specified left-closed + temporal interval. Applies to all temporal dimensions. The interval has to be specified as an array with + exactly two elements: 1. The first element is the start of the temporal interval. The specified instance + in time is **included** in the interval. 2. The second element is the end of the temporal interval. The + specified instance in time is **excluded** from the interval. The specified temporal strings follow [RFC + 3339](https://www.rfc-editor.org/rfc/rfc3339.html). Also supports open intervals by setting one of the + boundaries to `null`, but never both. Set this parameter to `null` to set no limit for the temporal + extent. Be careful with this when loading large datasets! It is recommended to use this parameter instead + of using ``filter_temporal()`` directly after loading unbounded data. + :param bands: Only adds the specified bands into the data cube so that bands that don't match the list of + band names are not available. Applies to all dimensions of type `bands`. Either the unique band name + (metadata field `name` in bands) or one of the common band names (metadata field `common_name` in bands) + can be specified. If the unique band name and the common name conflict, the unique band name has a higher + priority. The order of the specified array defines the order of the bands in the data cube. If multiple + bands match a common name, all matched bands are included in the original order. It is recommended to use + this parameter instead of using ``filter_bands()`` directly after loading unbounded data. + + :return: A data cube for further processing. + """ + return _process('load_result', id=id, spatial_extent=spatial_extent, temporal_extent=temporal_extent, bands=bands) + + +@openeo_process +def load_stac(url, spatial_extent=UNSET, temporal_extent=UNSET, bands=UNSET, properties=UNSET) -> ProcessBuilder: + """ + Loads data from STAC + + :param url: The URL to a static STAC catalog (STAC Item, STAC Collection, or STAC Catalog) or a specific + STAC API Collection that allows to filter items and to download assets. This includes batch job results, + which itself are compliant to STAC. For external URLs, authentication details such as API keys or tokens + may need to be included in the URL. Batch job results can be specified in two ways: - For Batch job + results at the same back-end, a URL pointing to the corresponding batch job results endpoint should be + provided. The URL usually ends with `/jobs/{id}/results` and `{id}` is the corresponding batch job ID. - + For external results, a signed URL must be provided. Not all back-ends support signed URLs, which are + provided as a link with the link relation `canonical` in the batch job result metadata. + :param spatial_extent: Limits the data to load to the specified bounding box or polygons. * For raster + data, the process loads the pixel into the data cube if the point at the pixel center intersects with the + bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). * For vector + data, the process loads the geometry into the data cube if the geometry is fully within the bounding box or + any of the polygons (as defined in the Simple Features standard by the OGC). Empty geometries may only be + in the data cube if no spatial extent has been provided. The GeoJSON can be one of the following feature + types: * A `Polygon` or `MultiPolygon` geometry, * a `Feature` with a `Polygon` or `MultiPolygon` + geometry, or * a `FeatureCollection` containing at least one `Feature` with `Polygon` or `MultiPolygon` + geometries. Set this parameter to `null` to set no limit for the spatial extent. Be careful with this when + loading large datasets! It is recommended to use this parameter instead of using ``filter_bbox()`` or + ``filter_spatial()`` directly after loading unbounded data. + :param temporal_extent: Limits the data to load to the specified left-closed temporal interval. Applies to + all temporal dimensions. The interval has to be specified as an array with exactly two elements: 1. The + first element is the start of the temporal interval. The specified instance in time is **included** in the + interval. 2. The second element is the end of the temporal interval. The specified instance in time is + **excluded** from the interval. The second element must always be greater/later than the first element. + Otherwise, a `TemporalExtentEmpty` exception is thrown. Also supports open intervals by setting one of the + boundaries to `null`, but never both. Set this parameter to `null` to set no limit for the temporal + extent. Be careful with this when loading large datasets! It is recommended to use this parameter instead + of using ``filter_temporal()`` directly after loading unbounded data. + :param bands: Only adds the specified bands into the data cube so that bands that don't match the list of + band names are not available. Applies to all dimensions of type `bands`. Either the unique band name + (metadata field `name` in bands) or one of the common band names (metadata field `common_name` in bands) + can be specified. If the unique band name and the common name conflict, the unique band name has a higher + priority. The order of the specified array defines the order of the bands in the data cube. If multiple + bands match a common name, all matched bands are included in the original order. It is recommended to use + this parameter instead of using ``filter_bands()`` directly after loading unbounded data. + :param properties: Limits the data by metadata properties to include only data in the data cube which all + given conditions return `true` for (AND operation). Specify key-value-pairs with the key being the name of + the metadata property, which can be retrieved with the openEO Data Discovery for Collections. The value + must be a condition (user-defined process) to be evaluated against a STAC API. This parameter is not + supported for static STAC. + + :return: A data cube for further processing. + """ + return _process('load_stac', url=url, spatial_extent=spatial_extent, temporal_extent=temporal_extent, bands=bands, properties=properties) + + +@openeo_process +def load_uploaded_files(paths, format, options=UNSET) -> ProcessBuilder: + """ + Load files from the user workspace + + :param paths: The files to read. Folders can't be specified, specify all files instead. An exception is + thrown if a file can't be read. + :param format: The file format to read from. It must be one of the values that the server reports as + supported input file formats, which usually correspond to the short GDAL/OGR codes. If the format is not + suitable for loading the data, a `FormatUnsuitable` exception will be thrown. This parameter is *case + insensitive*. + :param options: The file format parameters to be used to read the files. Must correspond to the parameters + that the server reports as supported parameters for the chosen `format`. The parameter names and valid + values usually correspond to the GDAL/OGR format options. + + :return: A data cube for further processing. + """ + return _process('load_uploaded_files', paths=paths, format=format, options=options) + + +@openeo_process +def load_url(url, format, options=UNSET) -> ProcessBuilder: + """ + Load data from a URL + + :param url: The URL to read from. Authentication details such as API keys or tokens may need to be included + in the URL. + :param format: The file format to use when loading the data. It must be one of the values that the server + reports as supported input file formats, which usually correspond to the short GDAL/OGR codes. If the + format is not suitable for loading the data, a `FormatUnsuitable` exception will be thrown. This parameter + is *case insensitive*. + :param options: The file format parameters to use when reading the data. Must correspond to the parameters + that the server reports as supported parameters for the chosen `format`. The parameter names and valid + values usually correspond to the GDAL/OGR format options. + + :return: A data cube for further processing. + """ + return _process('load_url', url=url, format=format, options=options) + + +@openeo_process +def log(x, base) -> ProcessBuilder: + """ + Logarithm to a base + + :param x: A number to compute the logarithm for. + :param base: The numerical base. + + :return: The computed logarithm. + """ + return _process('log', x=x, base=base) + + +@openeo_process +def lt(x, y) -> ProcessBuilder: + """ + Less than comparison + + :param x: First operand. + :param y: Second operand. + + :return: `true` if `x` is strictly less than `y`, `null` if any operand is `null`, otherwise `false`. + """ + return _process('lt', x=x, y=y) + + +@openeo_process +def lte(x, y) -> ProcessBuilder: + """ + Less than or equal to comparison + + :param x: First operand. + :param y: Second operand. + + :return: `true` if `x` is less than or equal to `y`, `null` if any operand is `null`, otherwise `false`. + """ + return _process('lte', x=x, y=y) + + +@openeo_process +def mask(data, mask, replacement=UNSET) -> ProcessBuilder: + """ + Apply a raster mask + + :param data: A raster data cube. + :param mask: A mask as a raster data cube. Every pixel in `data` must have a corresponding element in + `mask`. + :param replacement: The value used to replace masked values with. + + :return: A masked raster data cube with the same dimensions. The dimension properties (name, type, labels, + reference system and resolution) remain unchanged. + """ + return _process('mask', data=data, mask=mask, replacement=replacement) + + +@openeo_process +def mask_polygon(data, mask, replacement=UNSET, inside=UNSET) -> ProcessBuilder: + """ + Apply a polygon mask + + :param data: A raster data cube. + :param mask: A GeoJSON object or a vector data cube containing at least one polygon. The provided vector + data can be one of the following: * A `Polygon` or `MultiPolygon` geometry, * a `Feature` with a `Polygon` + or `MultiPolygon` geometry, or * a `FeatureCollection` containing at least one `Feature` with `Polygon` or + `MultiPolygon` geometries. * Empty geometries are ignored. + :param replacement: The value used to replace masked values with. + :param inside: If set to `true` all pixels for which the point at the pixel center **does** intersect with + any polygon are replaced. + + :return: A masked raster data cube with the same dimensions. The dimension properties (name, type, labels, + reference system and resolution) remain unchanged. + """ + return _process('mask_polygon', data=data, mask=mask, replacement=replacement, inside=inside) + + +@openeo_process +def max(data, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Maximum value + + :param data: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. Setting + this flag to `false` considers no-data values so that `null` is returned if any value is such a value. + + :return: The maximum value. + """ + return _process('max', data=data, ignore_nodata=ignore_nodata) + + +@openeo_process +def mean(data, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Arithmetic mean (average) + + :param data: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. Setting + this flag to `false` considers no-data values so that `null` is returned if any value is such a value. + + :return: The computed arithmetic mean. + """ + return _process('mean', data=data, ignore_nodata=ignore_nodata) + + +@openeo_process +def median(data, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Statistical median + + :param data: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. Setting + this flag to `false` considers no-data values so that `null` is returned if any value is such a value. + + :return: The computed statistical median. + """ + return _process('median', data=data, ignore_nodata=ignore_nodata) + + +@openeo_process +def merge_cubes(cube1, cube2, overlap_resolver=UNSET, context=UNSET) -> ProcessBuilder: + """ + Merge two data cubes + + :param cube1: The base data cube. + :param cube2: The other data cube to be merged with the base data cube. + :param overlap_resolver: A reduction operator that resolves the conflict if the data overlaps. The reducer + must return a value of the same data type as the input values are. The reduction operator may be a single + process such as ``multiply()`` or consist of multiple sub-processes. `null` (the default) can be specified + if no overlap resolver is required. + :param context: Additional data to be passed to the overlap resolver. + + :return: The merged data cube. See the process description for details regarding the dimensions and + dimension properties (name, type, labels, reference system and resolution). + """ + return _process('merge_cubes', + cube1=cube1, + cube2=cube2, + overlap_resolver=(build_child_callback(overlap_resolver, parent_parameters=['x', 'y', 'context']) if overlap_resolver not in [None, UNSET] else overlap_resolver), + context=context + ) + + +@openeo_process +def min(data, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Minimum value + + :param data: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. Setting + this flag to `false` considers no-data values so that `null` is returned if any value is such a value. + + :return: The minimum value. + """ + return _process('min', data=data, ignore_nodata=ignore_nodata) + + +@openeo_process +def mod(x, y) -> ProcessBuilder: + """ + Modulo + + :param x: A number to be used as the dividend. + :param y: A number to be used as the divisor. + + :return: The remainder after division. + """ + return _process('mod', x=x, y=y) + + +@openeo_process +def multiply(x, y) -> ProcessBuilder: + """ + Multiplication of two numbers + + :param x: The multiplier. + :param y: The multiplicand. + + :return: The computed product of the two numbers. + """ + return _process('multiply', x=x, y=y) + + +@openeo_process +def nan() -> ProcessBuilder: + """ + Not a Number (NaN) + + :return: Returns `NaN`. + """ + return _process('nan', ) + + +@openeo_process +def ndvi(data, nir=UNSET, red=UNSET, target_band=UNSET) -> ProcessBuilder: + """ + Normalized Difference Vegetation Index + + :param data: A raster data cube with two bands that have the common names `red` and `nir` assigned. + :param nir: The name of the NIR band. Defaults to the band that has the common name `nir` assigned. Either + the unique band name (metadata field `name` in bands) or one of the common band names (metadata field + `common_name` in bands) can be specified. If the unique band name and the common name conflict, the unique + band name has a higher priority. + :param red: The name of the red band. Defaults to the band that has the common name `red` assigned. Either + the unique band name (metadata field `name` in bands) or one of the common band names (metadata field + `common_name` in bands) can be specified. If the unique band name and the common name conflict, the unique + band name has a higher priority. + :param target_band: By default, the dimension of type `bands` is dropped. To keep the dimension specify a + new band name in this parameter so that a new dimension label with the specified name will be added for the + computed values. + + :return: A raster data cube containing the computed NDVI values. The structure of the data cube differs + depending on the value passed to `target_band`: * `target_band` is `null`: The data cube does not contain + the dimension of type `bands`, the number of dimensions decreases by one. The dimension properties (name, + type, labels, reference system and resolution) for all other dimensions remain unchanged. * `target_band` + is a string: The data cube keeps the same dimensions. The dimension properties remain unchanged, but the + number of dimension labels for the dimension of type `bands` increases by one. The additional label is + named as specified in `target_band`. + """ + return _process('ndvi', data=data, nir=nir, red=red, target_band=target_band) + + +@openeo_process +def neq(x, y, delta=UNSET, case_sensitive=UNSET) -> ProcessBuilder: + """ + Not equal to comparison + + :param x: First operand. + :param y: Second operand. + :param delta: Only applicable for comparing two numbers. If this optional parameter is set to a positive + non-zero number the non-equality of two numbers is checked against a delta value. This is especially useful + to circumvent problems with floating-point inaccuracy in machine-based computation. This option is + basically an alias for the following computation: `gt(abs(minus([x, y]), delta)` + :param case_sensitive: Only applicable for comparing two strings. Case sensitive comparison can be disabled + by setting this parameter to `false`. + + :return: `true` if `x` is *not* equal to `y`, `null` if any operand is `null`, otherwise `false`. + """ + return _process('neq', x=x, y=y, delta=delta, case_sensitive=case_sensitive) + + +@openeo_process +def normalized_difference(x, y) -> ProcessBuilder: + """ + Normalized difference + + :param x: The value for the first band. + :param y: The value for the second band. + + :return: The computed normalized difference. + """ + return _process('normalized_difference', x=x, y=y) + + +@openeo_process +def not_(x) -> ProcessBuilder: + """ + Inverting a boolean + + :param x: Boolean value to invert. + + :return: Inverted boolean value. + """ + return _process('not', x=x) + + +@openeo_process +def or_(x, y) -> ProcessBuilder: + """ + Logical OR + + :param x: A boolean value. + :param y: A boolean value. + + :return: Boolean result of the logical OR. + """ + return _process('or', x=x, y=y) + + +@openeo_process +def order(data, asc=UNSET, nodata=UNSET) -> ProcessBuilder: + """ + Get the order of array elements + + :param data: An array to compute the order for. + :param asc: The default sort order is ascending, with smallest values first. To sort in reverse + (descending) order, set this parameter to `false`. + :param nodata: Controls the handling of no-data values (`null`). By default, they are removed. If set to + `true`, missing values in the data are put last; if set to `false`, they are put first. + + :return: The computed permutation. + """ + return _process('order', data=data, asc=asc, nodata=nodata) + + +@openeo_process +def pi() -> ProcessBuilder: + """ + Pi (π) + + :return: The numerical value of Pi. + """ + return _process('pi', ) + + +@openeo_process +def power(base, p) -> ProcessBuilder: + """ + Exponentiation + + :param base: The numerical base. + :param p: The numerical exponent. + + :return: The computed value for `base` raised to the power of `p`. + """ + return _process('power', base=base, p=p) + + +@openeo_process +def predict_curve(parameters, function, dimension, labels=UNSET) -> ProcessBuilder: + """ + Predict values + + :param parameters: A data cube with optimal values, e.g. computed by the process ``fit_curve()``. + :param function: The model function. It must take the parameters to fit as array through the first argument + and the independent variable `x` as the second argument. It is recommended to store the model function as + a user-defined process on the back-end. + :param dimension: The name of the dimension for predictions. + :param labels: The labels to predict values for. If no labels are given, predicts values only for no-data + (`null`) values in the data cube. + + :return: A data cube with the predicted values with the provided dimension `dimension` having as many + labels as provided through `labels`. + """ + return _process('predict_curve', + parameters=parameters, + function=build_child_callback(function, parent_parameters=['x', 'parameters']), + dimension=dimension, + labels=labels + ) + + +@openeo_process +def predict_random_forest(data, model) -> ProcessBuilder: + """ + Predict values based on a Random Forest model + + :param data: An array of numbers. + :param model: A model object that can be trained with the processes ``fit_regr_random_forest()`` + (regression) and ``fit_class_random_forest()`` (classification). + + :return: The predicted value. Returns `null` if any of the given values in the array is a no-data value. + """ + return _process('predict_random_forest', data=data, model=model) + + +@openeo_process +def product(data, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Compute the product by multiplying numbers + + :param data: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. Setting + this flag to `false` considers no-data values so that `null` is returned if any value is such a value. + + :return: The computed product of the sequence of numbers. + """ + return _process('product', data=data, ignore_nodata=ignore_nodata) + + +@openeo_process +def quantiles(data, probabilities=UNSET, q=UNSET, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Quantiles + + :param data: An array of numbers. + :param probabilities: Quantiles to calculate. Either a list of probabilities or the number of intervals: * + Provide an array with a sorted list of probabilities in ascending order to calculate quantiles for. The + probabilities must be between 0 and 1 (inclusive). If not sorted in ascending order, an + `AscendingProbabilitiesRequired` exception is thrown. * Provide an integer to specify the number of + intervals to calculate quantiles for. Calculates q-quantiles with equal-sized intervals. + :param q: Number of intervals to calculate quantiles for. Calculates q-quantiles with equal-sized + intervals. This parameter has been **deprecated**. Please use the parameter `probabilities` instead. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. Setting + this flag to `false` considers no-data values so that an array with `null` values is returned if any + element is such a value. + + :return: An array with the computed quantiles. The list has either * as many elements as the given list of + `probabilities` had or * *`q`-1* elements. If the input array is empty the resulting array is filled with + as many `null` values as required according to the list above. See the 'Empty array' example for an + example. + """ + return _process('quantiles', data=data, probabilities=probabilities, q=q, ignore_nodata=ignore_nodata) + + +@openeo_process +def rearrange(data, order) -> ProcessBuilder: + """ + Sort an array based on a permutation + + :param data: The array to rearrange. + :param order: The permutation used for rearranging. + + :return: The rearranged array. + """ + return _process('rearrange', data=data, order=order) + + +@openeo_process +def reduce_dimension(data, reducer, dimension, context=UNSET) -> ProcessBuilder: + """ + Reduce dimensions + + :param data: A data cube. + :param reducer: A reducer to apply on the specified dimension. A reducer is a single process such as + ``mean()`` or a set of processes, which computes a single value for a list of values, see the category + 'reducer' for such processes. + :param dimension: The name of the dimension over which to reduce. Fails with a `DimensionNotAvailable` + exception if the specified dimension does not exist. + :param context: Additional data to be passed to the reducer. + + :return: A data cube with the newly computed values. It is missing the given dimension, the number of + dimensions decreases by one. The dimension properties (name, type, labels, reference system and resolution) + for all other dimensions remain unchanged. + """ + return _process('reduce_dimension', + data=data, + reducer=build_child_callback(reducer, parent_parameters=['data', 'context']), + dimension=dimension, + context=context + ) + + +@openeo_process +def reduce_spatial(data, reducer, context=UNSET) -> ProcessBuilder: + """ + Reduce spatial dimensions 'x' and 'y' + + :param data: A raster data cube. + :param reducer: A reducer to apply on the horizontal spatial dimensions. A reducer is a single process such + as ``mean()`` or a set of processes, which computes a single value for a list of values, see the category + 'reducer' for such processes. + :param context: Additional data to be passed to the reducer. + + :return: A data cube with the newly computed values. It is missing the horizontal spatial dimensions, the + number of dimensions decreases by two. The dimension properties (name, type, labels, reference system and + resolution) for all other dimensions remain unchanged. + """ + return _process('reduce_spatial', data=data, reducer=build_child_callback(reducer, parent_parameters=['data', 'context']), context=context) + + +@openeo_process +def rename_dimension(data, source, target) -> ProcessBuilder: + """ + Rename a dimension + + :param data: The data cube. + :param source: The current name of the dimension. Fails with a `DimensionNotAvailable` exception if the + specified dimension does not exist. + :param target: A new Name for the dimension. Fails with a `DimensionExists` exception if a dimension with + the specified name exists. + + :return: A data cube with the same dimensions, but the name of one of the dimensions changes. The old name + can not be referred to any longer. The dimension properties (name, type, labels, reference system and + resolution) remain unchanged. + """ + return _process('rename_dimension', data=data, source=source, target=target) + + +@openeo_process +def rename_labels(data, dimension, target, source=UNSET) -> ProcessBuilder: + """ + Rename dimension labels + + :param data: The data cube. + :param dimension: The name of the dimension to rename the labels for. + :param target: The new names for the labels. If a target dimension label already exists in the data cube, + a `LabelExists` exception is thrown. + :param source: The original names of the labels to be renamed to corresponding array elements in the + parameter `target`. It is allowed to only specify a subset of labels to rename, as long as the `target` and + `source` parameter have the same length. The order of the labels doesn't need to match the order of the + dimension labels in the data cube. By default, the array is empty so that the dimension labels in the data + cube are expected to be enumerated. If the dimension labels are not enumerated and the given array is + empty, the `LabelsNotEnumerated` exception is thrown. If one of the source dimension labels doesn't exist, + the `LabelNotAvailable` exception is thrown. + + :return: The data cube with the same dimensions. The dimension properties (name, type, labels, reference + system and resolution) remain unchanged, except that for the given dimension the labels change. The old + labels can not be referred to any longer. The number of labels remains the same. + """ + return _process('rename_labels', data=data, dimension=dimension, target=target, source=source) + + +@openeo_process +def resample_cube_spatial(data, target, method=UNSET) -> ProcessBuilder: + """ + Resample the spatial dimensions to match a target data cube + + :param data: A raster data cube. + :param target: A raster data cube that describes the spatial target resolution. + :param method: Resampling method to use. The following options are available and are meant to align with + [`gdalwarp`](https://gdal.org/programs/gdalwarp.html#cmdoption-gdalwarp-r): * `average`: average (mean) + resampling, computes the weighted average of all valid pixels * `bilinear`: bilinear resampling * `cubic`: + cubic resampling * `cubicspline`: cubic spline resampling * `lanczos`: Lanczos windowed sinc resampling * + `max`: maximum resampling, selects the maximum value from all valid pixels * `med`: median resampling, + selects the median value of all valid pixels * `min`: minimum resampling, selects the minimum value from + all valid pixels * `mode`: mode resampling, selects the value which appears most often of all the sampled + points * `near`: nearest neighbour resampling (default) * `q1`: first quartile resampling, selects the + first quartile value of all valid pixels * `q3`: third quartile resampling, selects the third quartile + value of all valid pixels * `rms` root mean square (quadratic mean) of all valid pixels * `sum`: compute + the weighted sum of all valid pixels Valid pixels are determined based on the function ``is_valid()``. + + :return: A raster data cube with the same dimensions. The dimension properties (name, type, labels, + reference system and resolution) remain unchanged, except for the resolution and dimension labels of the + spatial dimensions. + """ + return _process('resample_cube_spatial', data=data, target=target, method=method) + + +@openeo_process +def resample_cube_temporal(data, target, dimension=UNSET, valid_within=UNSET) -> ProcessBuilder: + """ + Resample temporal dimensions to match a target data cube + + :param data: A data cube with one or more temporal dimensions. + :param target: A data cube that describes the temporal target resolution. + :param dimension: The name of the temporal dimension to resample, which must exist with this name in both + data cubes. If the dimension is not set or is set to `null`, the process resamples all temporal dimensions + that exist with the same names in both data cubes. The following exceptions may occur: * A dimension is + given, but it does not exist in any of the data cubes: `DimensionNotAvailable` * A dimension is given, but + one of them is not temporal: `DimensionMismatch` * No specific dimension name is given and there are no + temporal dimensions with the same name in the data: `DimensionMismatch` + :param valid_within: Setting this parameter to a numerical value enables that the process searches for + valid values within the given period of days before and after the target timestamps. Valid values are + determined based on the function ``is_valid()``. For example, the limit of `7` for the target timestamps + `2020-01-15 12:00:00` looks for a nearest neighbor after `2020-01-08 12:00:00` and before `2020-01-22 + 12:00:00`. If no valid value is found within the given period, the value will be set to no-data (`null`). + + :return: A data cube with the same dimensions and the same dimension properties (name, type, labels, + reference system and resolution) for all non-temporal dimensions. For the temporal dimension, the name and + type remain unchanged, but the dimension labels, resolution and reference system may change. + """ + return _process('resample_cube_temporal', data=data, target=target, dimension=dimension, valid_within=valid_within) + + +@openeo_process +def resample_spatial(data, resolution=UNSET, projection=UNSET, method=UNSET, align=UNSET) -> ProcessBuilder: + """ + Resample and warp the spatial dimensions + + :param data: A raster data cube. + :param resolution: Resamples the data cube to the target resolution, which can be specified either as + separate values for x and y or as a single value for both axes. Specified in the units of the target + projection. Doesn't change the resolution by default (`0`). + :param projection: Warps the data cube to the target projection, specified as as [EPSG + code](http://www.epsg-registry.org/) or [WKT2 CRS + string](http://docs.opengeospatial.org/is/18-010r7/18-010r7.html). By default (`null`), the projection is + not changed. + :param method: Resampling method to use. The following options are available and are meant to align with + [`gdalwarp`](https://gdal.org/programs/gdalwarp.html#cmdoption-gdalwarp-r): * `average`: average (mean) + resampling, computes the weighted average of all valid pixels * `bilinear`: bilinear resampling * `cubic`: + cubic resampling * `cubicspline`: cubic spline resampling * `lanczos`: Lanczos windowed sinc resampling * + `max`: maximum resampling, selects the maximum value from all valid pixels * `med`: median resampling, + selects the median value of all valid pixels * `min`: minimum resampling, selects the minimum value from + all valid pixels * `mode`: mode resampling, selects the value which appears most often of all the sampled + points * `near`: nearest neighbour resampling (default) * `q1`: first quartile resampling, selects the + first quartile value of all valid pixels * `q3`: third quartile resampling, selects the third quartile + value of all valid pixels * `rms` root mean square (quadratic mean) of all valid pixels * `sum`: compute + the weighted sum of all valid pixels Valid pixels are determined based on the function ``is_valid()``. + :param align: Specifies to which corner of the spatial extent the new resampled data is aligned to. + + :return: A raster data cube with values warped onto the new projection. It has the same dimensions and the + same dimension properties (name, type, labels, reference system and resolution) for all non-spatial or + vertical spatial dimensions. For the horizontal spatial dimensions the name and type remain unchanged, but + reference system, labels and resolution may change depending on the given parameters. + """ + return _process('resample_spatial', data=data, resolution=resolution, projection=projection, method=method, align=align) + + +@openeo_process +def round(x, p=UNSET) -> ProcessBuilder: + """ + Round to a specified precision + + :param x: A number to round. + :param p: A positive number specifies the number of digits after the decimal point to round to. A negative + number means rounding to a power of ten, so for example *-2* rounds to the nearest hundred. Defaults to + *0*. + + :return: The rounded number. + """ + return _process('round', x=x, p=p) + + +@openeo_process +def run_udf(data, udf, runtime, version=UNSET, context=UNSET) -> ProcessBuilder: + """ + Run a UDF + + :param data: The data to be passed to the UDF. + :param udf: Either source code, an absolute URL or a path to a UDF script. + :param runtime: A UDF runtime identifier available at the back-end. + :param version: An UDF runtime version. If set to `null`, the default runtime version specified for each + runtime is used. + :param context: Additional data such as configuration options to be passed to the UDF. + + :return: The data processed by the UDF. The returned value can be of any data type and is exactly what the + UDF code returns. + """ + return _process('run_udf', data=data, udf=udf, runtime=runtime, version=version, context=context) + + +@openeo_process +def run_udf_externally(data, url, context=UNSET) -> ProcessBuilder: + """ + Run an externally hosted UDF container + + :param data: The data to be passed to the UDF. + :param url: Absolute URL to a remote UDF service. + :param context: Additional data such as configuration options to be passed to the UDF. + + :return: The data processed by the UDF. The returned value can in principle be of any data type, but it + depends on what is returned by the UDF code. Please see the implemented UDF interface for details. + """ + return _process('run_udf_externally', data=data, url=url, context=context) + + +@openeo_process +def sar_backscatter(data, coefficient=UNSET, elevation_model=UNSET, mask=UNSET, contributing_area=UNSET, local_incidence_angle=UNSET, ellipsoid_incidence_angle=UNSET, noise_removal=UNSET, options=UNSET) -> ProcessBuilder: + """ + Computes backscatter from SAR input + + :param data: The source data cube containing SAR input. + :param coefficient: Select the radiometric correction coefficient. The following options are available: * + `beta0`: radar brightness * `sigma0-ellipsoid`: ground area computed with ellipsoid earth model * + `sigma0-terrain`: ground area computed with terrain earth model * `gamma0-ellipsoid`: ground area computed + with ellipsoid earth model in sensor line of sight * `gamma0-terrain`: ground area computed with terrain + earth model in sensor line of sight (default) * `null`: non-normalized backscatter + :param elevation_model: The digital elevation model to use. Set to `null` (the default) to allow the back- + end to choose, which will improve portability, but reduce reproducibility. + :param mask: If set to `true`, a data mask is added to the bands with the name `mask`. It indicates which + values are valid (1), invalid (0) or contain no-data (null). + :param contributing_area: If set to `true`, a DEM-based local contributing area band named + `contributing_area` is added. The values are given in square meters. + :param local_incidence_angle: If set to `true`, a DEM-based local incidence angle band named + `local_incidence_angle` is added. The values are given in degrees. + :param ellipsoid_incidence_angle: If set to `true`, an ellipsoidal incidence angle band named + `ellipsoid_incidence_angle` is added. The values are given in degrees. + :param noise_removal: If set to `false`, no noise removal is applied. Defaults to `true`, which removes + noise. + :param options: Proprietary options for the backscatter computations. Specifying proprietary options will + reduce portability. + + :return: Backscatter values corresponding to the chosen parametrization. The values are given in linear + scale. + """ + return _process('sar_backscatter', + data=data, + coefficient=coefficient, + elevation_model=elevation_model, + mask=mask, + contributing_area=contributing_area, + local_incidence_angle=local_incidence_angle, + ellipsoid_incidence_angle=ellipsoid_incidence_angle, + noise_removal=noise_removal, + options=options + ) + + +@openeo_process +def save_result(data, format, options=UNSET) -> ProcessBuilder: + """ + Save processed data + + :param data: The data to deliver in the given file format. + :param format: The file format to use. It must be one of the values that the server reports as supported + output file formats, which usually correspond to the short GDAL/OGR codes. This parameter is *case + insensitive*. * If the data cube is empty and the file format can't store empty data cubes, a + `DataCubeEmpty` exception is thrown. * If the file format is otherwise not suitable for storing the + underlying data structure, a `FormatUnsuitable` exception is thrown. + :param options: The file format parameters to be used to create the file(s). Must correspond to the + parameters that the server reports as supported parameters for the chosen `format`. The parameter names and + valid values usually correspond to the GDAL/OGR format options. + + :return: Always returns `true` as in case of an error an exception is thrown which aborts the execution of + the process. + """ + return _process('save_result', data=data, format=format, options=options) + + +@openeo_process +def sd(data, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Standard deviation + + :param data: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. Setting + this flag to `false` considers no-data values so that `null` is returned if any value is such a value. + + :return: The computed sample standard deviation. + """ + return _process('sd', data=data, ignore_nodata=ignore_nodata) + + +@openeo_process +def sgn(x) -> ProcessBuilder: + """ + Signum + + :param x: A number. + + :return: The computed signum value of `x`. + """ + return _process('sgn', x=x) + + +@openeo_process +def sin(x) -> ProcessBuilder: + """ + Sine + + :param x: An angle in radians. + + :return: The computed sine of `x`. + """ + return _process('sin', x=x) + + +@openeo_process +def sinh(x) -> ProcessBuilder: + """ + Hyperbolic sine + + :param x: An angle in radians. + + :return: The computed hyperbolic sine of `x`. + """ + return _process('sinh', x=x) + + +@openeo_process +def sort(data, asc=UNSET, nodata=UNSET) -> ProcessBuilder: + """ + Sort data + + :param data: An array with data to sort. + :param asc: The default sort order is ascending, with smallest values first. To sort in reverse + (descending) order, set this parameter to `false`. + :param nodata: Controls the handling of no-data values (`null`). By default, they are removed. If set to + `true`, missing values in the data are put last; if set to `false`, they are put first. + + :return: The sorted array. + """ + return _process('sort', data=data, asc=asc, nodata=nodata) + + +@openeo_process +def sqrt(x) -> ProcessBuilder: + """ + Square root + + :param x: A number. + + :return: The computed square root. + """ + return _process('sqrt', x=x) + + +@openeo_process +def subtract(x, y) -> ProcessBuilder: + """ + Subtraction of two numbers + + :param x: The minuend. + :param y: The subtrahend. + + :return: The computed result. + """ + return _process('subtract', x=x, y=y) + + +@openeo_process +def sum(data, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Compute the sum by adding up numbers + + :param data: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. Setting + this flag to `false` considers no-data values so that `null` is returned if any value is such a value. + + :return: The computed sum of the sequence of numbers. + """ + return _process('sum', data=data, ignore_nodata=ignore_nodata) + + +@openeo_process +def tan(x) -> ProcessBuilder: + """ + Tangent + + :param x: An angle in radians. + + :return: The computed tangent of `x`. + """ + return _process('tan', x=x) + + +@openeo_process +def tanh(x) -> ProcessBuilder: + """ + Hyperbolic tangent + + :param x: An angle in radians. + + :return: The computed hyperbolic tangent of `x`. + """ + return _process('tanh', x=x) + + +@openeo_process +def text_begins(data, pattern, case_sensitive=UNSET) -> ProcessBuilder: + """ + Text begins with another text + + :param data: Text in which to find something at the beginning. + :param pattern: Text to find at the beginning of `data`. Regular expressions are not supported. + :param case_sensitive: Case sensitive comparison can be disabled by setting this parameter to `false`. + + :return: `true` if `data` begins with `pattern`, false` otherwise. + """ + return _process('text_begins', data=data, pattern=pattern, case_sensitive=case_sensitive) + + +@openeo_process +def text_concat(data, separator=UNSET) -> ProcessBuilder: + """ + Concatenate elements to a single text + + :param data: A set of elements. Numbers, boolean values and null values get converted to their (lower case) + string representation. For example: `1` (integer), `-1.5` (number), `true` / `false` (boolean values) + :param separator: A separator to put between each of the individual texts. Defaults to an empty string. + + :return: A string containing a string representation of all the array elements in the same order, with the + separator between each element. + """ + return _process('text_concat', data=data, separator=separator) + + +@openeo_process +def text_contains(data, pattern, case_sensitive=UNSET) -> ProcessBuilder: + """ + Text contains another text + + :param data: Text in which to find something in. + :param pattern: Text to find in `data`. Regular expressions are not supported. + :param case_sensitive: Case sensitive comparison can be disabled by setting this parameter to `false`. + + :return: `true` if `data` contains the `pattern`, false` otherwise. + """ + return _process('text_contains', data=data, pattern=pattern, case_sensitive=case_sensitive) + + +@openeo_process +def text_ends(data, pattern, case_sensitive=UNSET) -> ProcessBuilder: + """ + Text ends with another text + + :param data: Text in which to find something at the end. + :param pattern: Text to find at the end of `data`. Regular expressions are not supported. + :param case_sensitive: Case sensitive comparison can be disabled by setting this parameter to `false`. + + :return: `true` if `data` ends with `pattern`, false` otherwise. + """ + return _process('text_ends', data=data, pattern=pattern, case_sensitive=case_sensitive) + + +@openeo_process +def trim_cube(data) -> ProcessBuilder: + """ + Remove dimension labels with no-data values + + :param data: A data cube to trim. + + :return: A trimmed data cube with the same dimensions. The dimension properties name, type, reference + system and resolution remain unchanged. The number of dimension labels may decrease. + """ + return _process('trim_cube', data=data) + + +@openeo_process +def unflatten_dimension(data, dimension, target_dimensions, label_separator=UNSET) -> ProcessBuilder: + """ + Split a single dimensions into multiple dimensions + + :param data: A data cube that is consistently structured so that operation can execute flawlessly (e.g. the + dimension labels need to contain the `label_separator` exactly 1 time for two target dimensions, 2 times + for three target dimensions etc.). + :param dimension: The name of the dimension to split. + :param target_dimensions: The names of the new target dimensions. New dimensions will be created with the + given names and type `other` (see ``add_dimension()``). Fails with a `TargetDimensionExists` exception if + any of the dimensions exists. The order of the array defines the order in which the dimensions and + dimension labels are added to the data cube (see the example in the process description). + :param label_separator: The string that will be used as a separator to split the dimension labels. + + :return: A data cube with the new shape. The dimension properties (name, type, labels, reference system and + resolution) for all other dimensions remain unchanged. + """ + return _process('unflatten_dimension', data=data, dimension=dimension, target_dimensions=target_dimensions, label_separator=label_separator) + + +@openeo_process +def variance(data, ignore_nodata=UNSET) -> ProcessBuilder: + """ + Variance + + :param data: An array of numbers. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. Setting + this flag to `false` considers no-data values so that `null` is returned if any value is such a value. + + :return: The computed sample variance. + """ + return _process('variance', data=data, ignore_nodata=ignore_nodata) + + +@openeo_process +def vector_buffer(geometries, distance) -> ProcessBuilder: + """ + Buffer geometries by distance + + :param geometries: Geometries to apply the buffer on. Feature properties are preserved. + :param distance: The distance of the buffer in meters. A positive distance expands the geometries, + resulting in outward buffering (dilation), while a negative distance shrinks the geometries, resulting in + inward buffering (erosion). If the unit of the spatial reference system is not meters, a `UnitMismatch` + error is thrown. Use ``vector_reproject()`` to convert the geometries to a suitable spatial reference + system. + + :return: Returns a vector data cube with the computed new geometries of which some may be empty. + """ + return _process('vector_buffer', geometries=geometries, distance=distance) + + +@openeo_process +def vector_reproject(data, projection, dimension=UNSET) -> ProcessBuilder: + """ + Reprojects the geometry dimension + + :param data: A vector data cube. + :param projection: Coordinate reference system to reproject to. Specified as an [EPSG + code](http://www.epsg-registry.org/) or [WKT2 CRS + string](http://docs.opengeospatial.org/is/18-010r7/18-010r7.html). + :param dimension: The name of the geometry dimension to reproject. If no specific dimension is specified, + the filter applies to all geometry dimensions. Fails with a `DimensionNotAvailable` exception if the + specified dimension does not exist. + + :return: A vector data cube with geometries projected to the new coordinate reference system. The reference + system of the geometry dimension changes, all other dimensions and properties remain unchanged. + """ + return _process('vector_reproject', data=data, projection=projection, dimension=dimension) + + +@openeo_process +def vector_to_random_points(data, geometry_count=UNSET, total_count=UNSET, group=UNSET, seed=UNSET) -> ProcessBuilder: + """ + Sample random points from geometries + + :param data: Input geometries for sample extraction. + :param geometry_count: The maximum number of points to compute per geometry. Points in the input + geometries can be selected only once by the sampling. + :param total_count: The maximum number of points to compute overall. Throws a `CountMismatch` exception if + the specified value is less than the provided number of geometries. + :param group: Specifies whether the sampled points should be grouped by input geometry (default) or be + generated as independent points. * If the sampled points are grouped, the process generates a `MultiPoint` + per geometry given which keeps the original identifier if present. * Otherwise, each sampled point is + generated as a distinct `Point` geometry without identifier. + :param seed: A randomization seed to use for random sampling. If not given or `null`, no seed is used and + results may differ on subsequent use. + + :return: Returns a vector data cube with the sampled points. + """ + return _process('vector_to_random_points', data=data, geometry_count=geometry_count, total_count=total_count, group=group, seed=seed) + + +@openeo_process +def vector_to_regular_points(data, distance, group=UNSET) -> ProcessBuilder: + """ + Sample regular points from geometries + + :param data: Input geometries for sample extraction. + :param distance: Defines the minimum distance in meters that is required between two samples generated + *inside* a single geometry. If the unit of the spatial reference system is not meters, a `UnitMismatch` + error is thrown. Use ``vector_reproject()`` to convert the geometries to a suitable spatial reference + system. - For **polygons**, the distance defines the cell sizes of a regular grid that starts at the + upper-left bound of each polygon. The centroid of each cell is then a sample point. If the centroid is not + enclosed in the polygon, no point is sampled. If no point can be sampled for the geometry at all, the first + coordinate of the geometry is returned as point. - For **lines** (line strings), the sampling starts with a + point at the first coordinate of the line and then walks along the line and samples a new point each time + the distance to the previous point has been reached again. - For **points**, the point is returned as + given. + :param group: Specifies whether the sampled points should be grouped by input geometry (default) or be + generated as independent points. * If the sampled points are grouped, the process generates a `MultiPoint` + per geometry given which keeps the original identifier if present. * Otherwise, each sampled point is + generated as a distinct `Point` geometry without identifier. + + :return: Returns a vector data cube with the sampled points. + """ + return _process('vector_to_regular_points', data=data, distance=distance, group=group) + + +@openeo_process +def xor(x, y) -> ProcessBuilder: + """ + Logical XOR (exclusive or) + + :param x: A boolean value. + :param y: A boolean value. + + :return: Boolean result of the logical XOR. + """ + return _process('xor', x=x, y=y) diff --git a/lib/openeo/rest/__init__.py b/lib/openeo/rest/__init__.py new file mode 100644 index 000000000..22fbdb71b --- /dev/null +++ b/lib/openeo/rest/__init__.py @@ -0,0 +1,96 @@ +from typing import Optional + +from openeo import BaseOpenEoException + +# TODO: get from config file +DEFAULT_DOWNLOAD_CHUNK_SIZE = 10_000_000 # 10MB + + +class OpenEoClientException(BaseOpenEoException): + """Base class for OpenEO client exceptions""" + pass + + +class CapabilitiesException(OpenEoClientException): + """Back-end does not support certain openEO feature or endpoint.""" + + +class JobFailedException(OpenEoClientException): + """A synchronous batch job failed. This exception references its corresponding job so the client can e.g. + retrieve its logs. + """ + + def __init__(self, message, job): + super().__init__(message) + self.job = job + + +class OperatorException(OpenEoClientException): + """Invalid (mathematical) operator usage.""" + pass + + +class BandMathException(OperatorException): + """Invalid "band math" usage.""" + pass + + +class OpenEoRestError(OpenEoClientException): + pass + + +class OpenEoApiPlainError(OpenEoRestError): + """ + Base class for openEO API error responses, not necessarily following the openEO API specification + (e.g. not properly JSON encoded, missing required fields, ...) + + :param message: the direct error message from the response + :param http_status_code: the HTTP status code of the response + :param error_message: the error message to show when the exception is rendered + (by default a combination of the HTTP status code and the message) + + .. versionadded:: 0.25.0 + """ + + __slots__ = ("http_status_code", "message") + + def __init__( + self, + message: str, + *, + http_status_code: Optional[int] = None, + error_message: Optional[str] = None, + ): + super().__init__(error_message or f"[{http_status_code}] {message}") + self.http_status_code = http_status_code + self.message = message + + +class OpenEoApiError(OpenEoApiPlainError): + """ + Exception for API error responses following the openEO API specification + (https://api.openeo.org/#section/API-Principles/Error-Handling): + JSON-encoded body, some expected fields like "code" and "message", ... + """ + + __slots__ = ("http_status_code", "code", "message", "id", "url") + + def __init__( + self, + *, + http_status_code: int, + code: str, + message: str, + id: Optional[str] = None, + url: Optional[str] = None, + ): + super().__init__( + message=message, + http_status_code=http_status_code, + error_message=f"[{http_status_code}] {code}: {message}" + (f" (ref: {id})" if id else ""), + ) + self.http_status_code = http_status_code + self.code = code + self.message = message + self.id = id + self.url = url diff --git a/lib/openeo/rest/_datacube.py b/lib/openeo/rest/_datacube.py new file mode 100644 index 000000000..79fe5d5ea --- /dev/null +++ b/lib/openeo/rest/_datacube.py @@ -0,0 +1,358 @@ +from __future__ import annotations + +import logging +import pathlib +import re +import typing +import uuid +import warnings +from typing import Dict, List, Optional, Tuple, Union + +import requests + +from openeo.internal.graph_building import FlatGraphableMixin, PGNode, _FromNodeMixin +from openeo.internal.jupyter import render_component +from openeo.internal.processes.builder import ( + convert_callable_to_pgnode, + get_parameter_names, +) +from openeo.internal.warnings import UserDeprecationWarning +from openeo.rest import OpenEoClientException +from openeo.util import dict_no_none, str_truncate + +if typing.TYPE_CHECKING: + # Imports for type checking only (circular import issue at runtime). + from openeo.rest.connection import Connection + +log = logging.getLogger(__name__) + +# Sentinel object to refer to "current" cube in chained cube processing expressions. +THIS = object() + + +class _ProcessGraphAbstraction(_FromNodeMixin, FlatGraphableMixin): + """ + Base class for client-side abstractions/wrappers + for structures that are represented by a openEO process graph: + raster data cubes, vector cubes, ML models, ... + """ + + def __init__(self, pgnode: PGNode, connection: Union[Connection, None]): + self._pg = pgnode + # TODO: now that connection can officially be None: + # improve exceptions in cases where is it still assumed to be a real connection (download, create_job, ...) + self._connection = connection + + def __str__(self): + return "{t}({pg})".format(t=self.__class__.__name__, pg=self._pg) + + def flat_graph(self) -> Dict[str, dict]: + """ + Get the process graph in internal flat dict representation. + + .. warning:: This method is mainly intended for internal use. + It is not recommended for general use and is *subject to change*. + + Instead, it is recommended to use + :py:meth:`to_json()` or :py:meth:`print_json()` + to obtain a standardized, interoperable JSON representation of the process graph. + See :ref:`process_graph_export` for more information. + """ + # TODO: wrap in {"process_graph":...} by default/optionally? + return self._pg.flat_graph() + + @property + def _api_version(self): + return self._connection.capabilities().api_version_check + + @property + def connection(self) -> Connection: + return self._connection + + def result_node(self) -> PGNode: + """ + Get the current result node (:py:class:`PGNode`) of the process graph. + + .. versionadded:: 0.10.1 + """ + return self._pg + + def from_node(self): + # _FromNodeMixin API + return self._pg + + def _build_pgnode( + self, + process_id: str, + arguments: Optional[dict] = None, + namespace: Optional[str] = None, + **kwargs + ) -> PGNode: + """ + Helper to build a PGNode from given argument dict and/or kwargs, + and possibly resolving the `THIS` reference. + """ + arguments = {**(arguments or {}), **kwargs} + for k, v in arguments.items(): + if v is THIS: + arguments[k] = self + # TODO: also necessary to traverse lists/dictionaries? + return PGNode(process_id=process_id, arguments=arguments, namespace=namespace) + + # TODO #278 also move process graph "execution" methods here: `download`, `execute`, `execute_batch`, `create_job`, `save_udf`, ... + + def _repr_html_(self): + process = {"process_graph": self.flat_graph()} + parameters = { + "id": uuid.uuid4().hex, + "explicit-zoom": True, + "height": "400px", + } + return render_component("model-builder", data=process, parameters=parameters) + + +class UDF: + """ + Helper class to load UDF code (e.g. from file) and embed them as "callback" or child process in a process graph. + + Usage example: + + .. code-block:: python + + udf = UDF.from_file("my-udf-code.py") + cube = cube.apply(process=udf) + + + .. versionchanged:: 0.13.0 + Added auto-detection of ``runtime``. + Specifying the ``data`` argument is not necessary anymore, and actually deprecated. + Added :py:meth:`from_file` to simplify loading UDF code from a file. + See :ref:`old_udf_api` for more background about the changes. + """ + + # TODO: eliminate dependency on `openeo.rest.connection` and move to somewhere under `openeo.internal`? + + __slots__ = ["code", "_runtime", "version", "context", "_source"] + + def __init__( + self, + code: str, + runtime: Optional[str] = None, + data=None, # TODO #181 remove `data` argument + version: Optional[str] = None, + context: Optional[dict] = None, + _source=None, + ): + """ + Construct a UDF object from given code string and other argument related to the ``run_udf`` process. + + :param code: UDF source code string (Python, R, ...) + :param runtime: optional UDF runtime identifier, will be autodetected from source code if omitted. + :param data: unused leftover from old API. Don't use this argument, it will be removed in a future release. + :param version: optional UDF runtime version string + :param context: optional additional UDF context data + :param _source: (for internal use) source identifier + """ + # TODO: automatically dedent code (when literal string) ? + self.code = code + self._runtime = runtime + self.version = version + self.context = context + self._source = _source + if data is not None: + # TODO #181 remove `data` argument + warnings.warn( + f"The `data` argument of `{self.__class__.__name__}` is deprecated, unused and will be removed in a future release.", + category=UserDeprecationWarning, + stacklevel=2, + ) + + def __repr__(self): + return f"<{type(self).__name__} runtime={self._runtime!r} code={str_truncate(self.code, width=200)!r}>" + + def get_runtime(self, connection: Optional[Connection] = None) -> str: + return self._runtime or self._guess_runtime(connection=connection) + + @classmethod + def from_file( + cls, + path: Union[str, pathlib.Path], + runtime: Optional[str] = None, + version: Optional[str] = None, + context: Optional[dict] = None, + ) -> UDF: + """ + Load a UDF from a local file. + + .. seealso:: + :py:meth:`from_url` for loading from a URL. + + :param path: path to the local file with UDF source code + :param runtime: optional UDF runtime identifier, will be auto-detected from source code if omitted. + :param version: optional UDF runtime version string + :param context: optional additional UDF context data + """ + path = pathlib.Path(path) + code = path.read_text(encoding="utf-8") + return cls( + code=code, runtime=runtime, version=version, context=context, _source=path + ) + + @classmethod + def from_url( + cls, + url: str, + runtime: Optional[str] = None, + version: Optional[str] = None, + context: Optional[dict] = None, + ) -> UDF: + """ + Load a UDF from a URL. + + .. seealso:: + :py:meth:`from_file` for loading from a local file. + + :param url: URL path to load the UDF source code from + :param runtime: optional UDF runtime identifier, will be auto-detected from source code if omitted. + :param version: optional UDF runtime version string + :param context: optional additional UDF context data + """ + resp = requests.get(url) + resp.raise_for_status() + code = resp.text + return cls( + code=code, runtime=runtime, version=version, context=context, _source=url + ) + + def _guess_runtime(self, connection: Optional[Connection] = None) -> str: + """Guess UDF runtime from UDF source (path) or source code.""" + # First, guess UDF language + language = None + if isinstance(self._source, pathlib.Path): + language = self._guess_runtime_from_suffix(self._source.suffix) + elif isinstance(self._source, str): + url_match = re.match( + r"https?://.*?(?P\.\w+)([&#].*)?$", self._source + ) + if url_match: + language = self._guess_runtime_from_suffix(url_match.group("suffix")) + if not language: + # Guess language from UDF code + if re.search(r"^def [\w0-9_]+\(", self.code, flags=re.MULTILINE): + language = "Python" + # TODO: detection heuristics for R and other languages? + if not language: + raise OpenEoClientException("Failed to detect language of UDF code.") + runtime = language + if connection: + # Some additional best-effort validation/normalization of the runtime + # TODO: this just does some case-normalization, just drop that all together to eliminate + # the dependency on a connection object. See https://github.com/Open-EO/openeo-api/issues/510 + runtimes = {k.lower(): k for k in connection.list_udf_runtimes().keys()} + runtime = runtimes.get(runtime.lower(), runtime) + return runtime + + def _guess_runtime_from_suffix(self, suffix: str) -> Union[str]: + return { + ".py": "Python", + ".r": "R", + }.get(suffix.lower()) + + def get_run_udf_callback(self, connection: Optional[Connection] = None, data_parameter: str = "data") -> PGNode: + """ + For internal use: construct `run_udf` node to be used as callback in `apply`, `reduce_dimension`, ... + """ + arguments = dict_no_none( + data={"from_parameter": data_parameter}, + udf=self.code, + runtime=self.get_runtime(connection=connection), + version=self.version, + context=self.context, + ) + return PGNode(process_id="run_udf", arguments=arguments) + + +def build_child_callback( + process: Union[str, PGNode, typing.Callable, UDF], + parent_parameters: List[str], + connection: Optional[Connection] = None, +) -> dict: + """ + Build a "callback" process: a user defined process that is used by another process (such + as `apply`, `apply_dimension`, `reduce`, ....) + + :param process: process id string, PGNode or callable that uses the ProcessBuilder mechanism to build a process + :param parent_parameters: list of parameter names defined for child process + :param connection: optional connection object to improve runtime validation for UDFs + :return: + """ + # TODO: move this to more generic process graph building utility module + # TODO: autodetect the parameters defined by parent process? + # TODO: eliminate need for connection object (also see `UDF._guess_runtime`) + # TODO: when `openeo.rest` deps are gone: move this helper to somewhere under `openeo.internal` + if isinstance(process, PGNode): + # Assume this is already a valid callback process + pg = process + elif isinstance(process, str): + # Assume given reducer is a simple predefined reduce process_id + # TODO: avoid local import (workaround for circular import issue) + import openeo.processes + if process in openeo.processes.__dict__: + process_params = get_parameter_names(openeo.processes.__dict__[process]) + # TODO: switch to "Callable" handling here + else: + # Best effort guess + process_params = parent_parameters + if parent_parameters == ["x", "y"] and (len(process_params) == 1 or process_params[:1] == ["data"]): + # Special case: wrap all parent parameters in an array + arguments = {process_params[0]: [{"from_parameter": p} for p in parent_parameters]} + else: + # Only pass parameters that correspond with an arg name + common = set(process_params).intersection(parent_parameters) + arguments = {p: {"from_parameter": p} for p in common} + pg = PGNode(process_id=process, arguments=arguments) + elif isinstance(process, typing.Callable): + pg = convert_callable_to_pgnode(process, parent_parameters=parent_parameters) + elif isinstance(process, UDF): + pg = process.get_run_udf_callback(connection=connection, data_parameter=parent_parameters[0]) + elif isinstance(process, dict) and isinstance(process.get("process_graph"), PGNode): + pg = process["process_graph"] + else: + raise ValueError(process) + + return PGNode.to_process_graph_argument(pg) + + +def _ensure_save_result( + cube: _ProcessGraphAbstraction, + *, + format: Optional[str] = None, + options: Optional[dict] = None, + weak_format: Optional[str] = None, + default_format: str, + method: str, +) -> _ProcessGraphAbstraction: + """ + Make sure there is a`save_result` node in the process graph. + + :param format: (optional) desired `save_result` file format + :param options: (optional) desired `save_result` file format parameters + :param weak_format: (optional) weak format indicator guessed from file name + :param default_format: default format for data type to use when no format is specified by user + :return: + """ + # TODO #278 instead of standalone helper function, move this to common base class for raster cubes, vector cubes, ... + save_result_nodes = [n for n in cube.result_node().walk_nodes() if n.process_id == "save_result"] + + if not save_result_nodes: + # No `save_result` node yet: automatically add it. + # TODO: the `save_result` method is not defined on _ProcessGraphAbstraction, but it is on DataCube and VectorCube + cube = cube.save_result(format=format or weak_format or default_format, options=options) + elif format or options: + raise OpenEoClientException( + f"{method} with explicit output {'format' if format else 'options'} {format or options!r}," + f" but the process graph already has `save_result` node(s)" + f" which is ambiguous and should not be combined." + ) + + return cube diff --git a/lib/openeo/rest/_testing.py b/lib/openeo/rest/_testing.py new file mode 100644 index 000000000..7940210d6 --- /dev/null +++ b/lib/openeo/rest/_testing.py @@ -0,0 +1,433 @@ +from __future__ import annotations + +import collections +import json +import re +from typing import ( + Callable, + Dict, + Iterable, + Iterator, + Mapping, + Optional, + Sequence, + Tuple, + Union, +) + +from openeo import Connection, DataCube +from openeo.rest.vectorcube import VectorCube + +OPENEO_BACKEND = "https://openeo.test/" + + +class OpeneoTestingException(Exception): + pass + + +class DummyBackend: + """ + Dummy backend that handles sync/batch execution requests + and allows inspection of posted process graphs + """ + + # TODO: move to openeo.testing + # TODO: unify "batch_jobs", "batch_jobs_full" and "extra_job_metadata_fields"? + # TODO: unify "sync_requests" and "sync_requests_full"? + + __slots__ = ( + "_requests_mock", + "connection", + "file_formats", + "sync_requests", + "sync_requests_full", + "batch_jobs", + "batch_jobs_full", + "validation_requests", + "next_result", + "next_validation_errors", + "_forced_job_status", + "job_status_updater", + "job_id_generator", + "extra_job_metadata_fields", + ) + + # Default result (can serve both as JSON or binary data) + DEFAULT_RESULT = b'{"what?": "Result data"}' + + def __init__( + self, + requests_mock, + connection: Connection, + ): + self._requests_mock = requests_mock + self.connection = connection + self.file_formats = {"input": {}, "output": {}} + self.sync_requests = [] + self.sync_requests_full = [] + self.batch_jobs = {} + self.batch_jobs_full = {} + self.validation_requests = [] + self.next_result = self.DEFAULT_RESULT + self.next_validation_errors = [] + self.extra_job_metadata_fields = [] + self._forced_job_status: Dict[str, str] = {} + + # Job status update hook: + # callable that is called on starting a job, and getting job metadata + # allows to dynamically change how the status of a job evolves + # By default: immediately set to "finished" once job is started + self.job_status_updater = lambda job_id, current_status: "finished" + + # Optional job id generator hook: + # callable that generates a job id, e.g. based on the process graph. + # When set to None, or the callable returns None, or it returns an existing job id: + # things fall back to auto-increment job ids ("job-000", "job-001", "job-002", ...) + self.job_id_generator: Optional[Callable[[dict], str]] = None + + requests_mock.post( + connection.build_url("/result"), + content=self._handle_post_result, + ) + requests_mock.post( + connection.build_url("/jobs"), + content=self._handle_post_jobs, + ) + requests_mock.post( + re.compile(connection.build_url(r"/jobs/(job-\d+)/results$")), content=self._handle_post_job_results + ) + requests_mock.get(re.compile(connection.build_url(r"/jobs/(job-\d+)$")), json=self._handle_get_job) + requests_mock.get( + re.compile(connection.build_url(r"/jobs/(job-\d+)/results$")), json=self._handle_get_job_results + ) + requests_mock.delete( + re.compile(connection.build_url(r"/jobs/(job-\d+)/results$")), json=self._handle_delete_job_results + ) + requests_mock.get( + re.compile(connection.build_url("/jobs/(.*?)/results/result.data$")), + content=self._handle_get_job_result_asset, + ) + requests_mock.get( + re.compile(connection.build_url(r"/jobs/(.*?)/logs($|\?.*)")), + # TODO: need to fine-tune dummy logs? + json={"logs": [], "links": []}, + ) + requests_mock.post(connection.build_url("/validation"), json=self._handle_post_validation) + + @classmethod + def at_url(cls, root_url: str, *, requests_mock, capabilities: Optional[dict] = None) -> DummyBackend: + """ + Factory to build dummy backend from given root URL + including creation of connection and mocking of capabilities doc + """ + root_url = root_url.rstrip("/") + "/" + requests_mock.get(root_url, json=build_capabilities(**(capabilities or {}))) + connection = Connection(root_url) + return cls(requests_mock=requests_mock, connection=connection) + + def setup_collection( + self, + collection_id: str, + *, + temporal: Union[bool, Tuple[str, str]] = True, + bands: Sequence[str] = ("B1", "B2", "B3"), + ): + # TODO: also mock `/collections` overview + # TODO: option to override cube_dimensions as a whole, or override dimension names + cube_dimensions = { + "x": {"type": "spatial"}, + "y": {"type": "spatial"}, + } + + if temporal: + cube_dimensions["t"] = { + "type": "temporal", + "extent": temporal if isinstance(temporal, tuple) else [None, None], + } + if bands: + cube_dimensions["bands"] = {"type": "bands", "values": list(bands)} + + self._requests_mock.get( + self.connection.build_url(f"/collections/{collection_id}"), + # TODO: add more metadata? + json={ + "id": collection_id, + # define temporal and band dim + "cube:dimensions": {"t": {"type": "temporal"}, "bands": {"type": "bands"}}, + }, + ) + return self + + def setup_file_format(self, name: str, type: str = "output", gis_data_types: Iterable[str] = ("raster",)): + self.file_formats[type][name] = { + "title": name, + "gis_data_types": list(gis_data_types), + "parameters": {}, + } + self._requests_mock.get(self.connection.build_url("/file_formats"), json=self.file_formats) + return self + + def _handle_post_result(self, request, context): + """handler of `POST /result` (synchronous execute)""" + post_data = request.json() + pg = post_data["process"]["process_graph"] + self.sync_requests_full.append(post_data) + self.sync_requests.append(pg) + result = self.next_result + if isinstance(result, (dict, list)): + result = json.dumps(result).encode("utf-8") + elif isinstance(result, str): + result = result.encode("utf-8") + assert isinstance(result, bytes) + return result + + def _handle_post_jobs(self, request, context): + """handler of `POST /jobs` (create batch job)""" + post_data = request.json() + pg = post_data["process"]["process_graph"] + + # Generate (new) job id + job_id = self.job_id_generator and self.job_id_generator(process_graph=pg) + if not job_id or job_id in self.batch_jobs: + # As fallback: use auto-increment job ids ("job-000", "job-001", "job-002", ...) + job_id = f"job-{len(self.batch_jobs):03d}" + assert job_id not in self.batch_jobs + + # Full post data dump + self.batch_jobs_full[job_id] = post_data + + # Batch job essentials + job_data = {"job_id": job_id, "pg": pg, "status": "created"} + for field in ["title", "description"]: + if field in post_data: + job_data[field] = post_data[field] + for field in self.extra_job_metadata_fields: + job_data[field] = post_data.get(field) + self.batch_jobs[job_id] = job_data + context.status_code = 201 + context.headers["openeo-identifier"] = job_id + + def _get_job_id(self, request) -> str: + match = re.match(r"^/jobs/(job-\d+)(/|$)", request.path) + if not match: + raise OpeneoTestingException(f"Failed to extract job_id from {request.path}") + job_id = match.group(1) + assert job_id in self.batch_jobs + return job_id + + def _get_job_status(self, job_id: str, current_status: str) -> str: + if job_id in self._forced_job_status: + return self._forced_job_status[job_id] + return self.job_status_updater(job_id=job_id, current_status=current_status) + + def _handle_post_job_results(self, request, context): + """Handler of `POST /job/{job_id}/results` (start batch job).""" + job_id = self._get_job_id(request) + assert self.batch_jobs[job_id]["status"] == "created" + self.batch_jobs[job_id]["status"] = self._get_job_status( + job_id=job_id, current_status=self.batch_jobs[job_id]["status"] + ) + context.status_code = 202 + + def _handle_get_job(self, request, context): + """Handler of `GET /job/{job_id}` (get batch job status and metadata).""" + job_id = self._get_job_id(request) + # Allow updating status with `job_status_setter` once job got past status "created" + if self.batch_jobs[job_id]["status"] != "created": + self.batch_jobs[job_id]["status"] = self._get_job_status( + job_id=job_id, current_status=self.batch_jobs[job_id]["status"] + ) + result = { + # TODO: add some more required fields like "process" and "created"? + "id": job_id, + "status": self.batch_jobs[job_id]["status"], + } + if self.batch_jobs[job_id]["status"] == "finished": # HACK some realistic values for a small job + result["costs"] = 123 + result["usage"] = { + "cpu": {"unit": "cpu-seconds", "value": 1234.5}, + "memory": {"unit": "mb-seconds", "value": 34567.89}, + "duration": {"unit": "seconds", "value": 2345}, + } + return result + + def _handle_get_job_results(self, request, context): + """Handler of `GET /job/{job_id}/results` (list batch job results).""" + job_id = self._get_job_id(request) + assert self.batch_jobs[job_id]["status"] == "finished" + return { + "id": job_id, + "assets": {"result.data": {"href": self.connection.build_url(f"/jobs/{job_id}/results/result.data")}}, + } + + def _handle_delete_job_results(self, request, context): + """Handler of `DELETE /job/{job_id}/results` (cancel job).""" + job_id = self._get_job_id(request) + self.batch_jobs[job_id]["status"] = "canceled" + self._forced_job_status[job_id] = "canceled" + context.status_code = 204 + + def _handle_get_job_result_asset(self, request, context): + """Handler of `GET /job/{job_id}/results/result.data` (get batch job result asset).""" + job_id = self._get_job_id(request) + assert self.batch_jobs[job_id]["status"] == "finished" + return self.next_result + + def _handle_post_validation(self, request, context): + """Handler of `POST /validation` (validate process graph).""" + pg = request.json()["process_graph"] + self.validation_requests.append(pg) + return {"errors": self.next_validation_errors} + + def get_sync_pg(self) -> dict: + """Get one and only synchronous process graph""" + assert len(self.sync_requests) == 1 + return self.sync_requests[0] + + def get_sync_post_data(self) -> dict: + """Get post data of the one and only synchronous job.""" + assert len(self.sync_requests_full) == 1 + return self.sync_requests_full[0] + + def get_batch_pg(self) -> dict: + """ + Get process graph of the one and only batch job. + Fails when there is none or more than one. + """ + assert len(self.batch_jobs) == 1 + return self.batch_jobs[max(self.batch_jobs.keys())]["pg"] + + def get_batch_post_data(self) -> dict: + """ + Get post data of the one and only batch job. + Fails when there is none or more than one. + """ + assert len(self.batch_jobs_full) == 1 + return self.batch_jobs_full[max(self.batch_jobs_full.keys())] + + def get_validation_pg(self) -> dict: + """ + Get process graph of the one and only validation request. + """ + assert len(self.validation_requests) == 1 + return self.validation_requests[0] + + def get_pg(self, process_id: Optional[str] = None) -> dict: + """ + Get one and only batch process graph (sync or batch) + + :param process_id: just return single process graph node with this process_id + :return: process graph (flat graph representation) or process graph node + """ + pgs = self.sync_requests + [b["pg"] for b in self.batch_jobs.values()] + if len(pgs) != 1: + raise OpeneoTestingException(f"Expected single process graph, but collected {len(pgs)}") + pg = pgs[0] + if process_id: + # Just return single node (by process_id) + found = [node for node in pg.values() if node.get("process_id") == process_id] + if len(found) != 1: + raise OpeneoTestingException( + f"Expected single process graph node with process_id {process_id!r}, but found {len(found)}: {found}" + ) + return found[0] + return pg + + def execute(self, cube: Union[DataCube, VectorCube], process_id: Optional[str] = None) -> dict: + """ + Execute given cube (synchronously) and return observed process graph (or subset thereof). + + :param cube: cube to execute on dummy back-end + :param process_id: just return single process graph node with this process_id + :return: process graph (flat graph representation) or process graph node + """ + cube.execute() + return self.get_pg(process_id=process_id) + + def setup_simple_job_status_flow( + self, + *, + queued: int = 1, + running: int = 4, + final: str = "finished", + final_per_job: Optional[Mapping[str, str]] = None, + ): + """ + Set up simple job status flow: + + queued (a couple of times) -> running (a couple of times) -> finished/error. + + Final state can be specified generically with arg `final` + and, optionally, further fine-tuned per job with `final_per_job`. + """ + template = ["queued"] * queued + ["running"] * running + job_stacks = collections.defaultdict(template.copy) + final_per_job = final_per_job or {} + + def get_status(job_id: str, current_status: str) -> str: + stack = job_stacks[job_id] + # Pop first item each time, unless we're in final state + return stack.pop(0) if len(stack) > 0 else final_per_job.get(job_id, final) + + self.job_status_updater = get_status + + +def build_capabilities( + *, + api_version: str = "1.0.0", + stac_version: str = "0.9.0", + basic_auth: bool = True, + oidc_auth: bool = True, + collections: bool = True, + processes: bool = True, + sync_processing: bool = True, + validation: bool = False, + batch_jobs: bool = True, + udp: bool = False, +) -> dict: + """Build a dummy capabilities document for testing purposes.""" + + endpoints = [] + if basic_auth: + endpoints.append({"path": "/credentials/basic", "methods": ["GET"]}) + if oidc_auth: + endpoints.append({"path": "/credentials/oidc", "methods": ["GET"]}) + if basic_auth or oidc_auth: + endpoints.append({"path": "/me", "methods": ["GET"]}) + + if collections: + endpoints.append({"path": "/collections", "methods": ["GET"]}) + endpoints.append({"path": "/collections/{collection_id}", "methods": ["GET"]}) + if processes: + endpoints.append({"path": "/processes", "methods": ["GET"]}) + if sync_processing: + endpoints.append({"path": "/result", "methods": ["POST"]}) + if validation: + endpoints.append({"path": "/validation", "methods": ["POST"]}) + if batch_jobs: + endpoints.extend( + [ + {"path": "/jobs", "methods": ["GET", "POST"]}, + {"path": "/jobs/{job_id}", "methods": ["GET", "DELETE"]}, + {"path": "/jobs/{job_id}/results", "methods": ["GET", "POST", "DELETE"]}, + {"path": "/jobs/{job_id}/logs", "methods": ["GET"]}, + ] + ) + if udp: + endpoints.extend( + [ + {"path": "/process_graphs", "methods": ["GET"]}, + {"path": "/process_graphs/{process_graph_id", "methods": ["GET", "PUT", "DELETE"]}, + ] + ) + + capabilities = { + "api_version": api_version, + "stac_version": stac_version, + "id": "dummy", + "title": "Dummy openEO back-end", + "description": "Dummy openeEO back-end", + "endpoints": endpoints, + "links": [], + } + return capabilities diff --git a/lib/openeo/rest/auth/__init__.py b/lib/openeo/rest/auth/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/lib/openeo/rest/auth/auth.py b/lib/openeo/rest/auth/auth.py new file mode 100644 index 000000000..1eff400fa --- /dev/null +++ b/lib/openeo/rest/auth/auth.py @@ -0,0 +1,54 @@ +import collections +from typing import Optional + +from requests import Request +from requests.auth import AuthBase + + +class OpenEoApiAuthBase(AuthBase): + """ + Base class for authentication with the OpenEO REST API. + + Follows the authentication approach of the requests library: + an auth object is a callable object that can be passed with get/post request + to manipulate this request (typically setting headers). + """ + + def __call__(self, req: Request) -> Request: + # Do nothing by default + return req + + +class NullAuth(OpenEoApiAuthBase): + """No authentication""" + + pass + + +class BearerAuth(OpenEoApiAuthBase): + """ + Requests are authenticated through a bearer token + https://open-eo.github.io/openeo-api/apireference/#section/Authentication/Bearer + """ + + def __init__(self, bearer: str): + self.bearer = bearer + + def __call__(self, req: Request) -> Request: + # Add bearer authorization header. + req.headers["Authorization"] = "Bearer {b}".format(b=self.bearer) + return req + + +class BasicBearerAuth(BearerAuth): + """Bearer token for Basic Auth (openEO API 1.0.0 style)""" + + def __init__(self, access_token: str): + super().__init__(bearer="basic//{t}".format(t=access_token)) + + +class OidcBearerAuth(BearerAuth): + """Bearer token for OIDC Auth (openEO API 1.0.0 style)""" + + def __init__(self, provider_id: str, access_token: str): + super().__init__(bearer="oidc/{p}/{t}".format(p=provider_id, t=access_token)) diff --git a/lib/openeo/rest/auth/cli.py b/lib/openeo/rest/auth/cli.py new file mode 100644 index 000000000..8c4068f30 --- /dev/null +++ b/lib/openeo/rest/auth/cli.py @@ -0,0 +1,376 @@ +import argparse +import builtins +import json +import logging +import sys +from collections import OrderedDict +from getpass import getpass +from pathlib import Path +from typing import List, Tuple + +from openeo import Connection, connect +from openeo.capabilities import ApiVersionException +from openeo.rest.auth.config import AuthConfig, RefreshTokenStore +from openeo.rest.auth.oidc import OidcProviderInfo + +_log = logging.getLogger(__name__) + + +class CliToolException(RuntimeError): + pass + + +_OIDC_FLOW_CHOICES = [ + "auth-code", + "device", + # TODO: add client credentials flow? +] + + +def main(argv=None): + root_parser = argparse.ArgumentParser(description="Tool to manage openEO related authentication and configuration.") + root_parser.add_argument( + "--verbose", "-v", action="count", default=0, help="Increase logging verbosity. Can be given multiple times." + ) + root_subparsers = root_parser.add_subparsers(title="Subcommands", dest="subparser_name") + + # Command: paths + paths_parser = root_subparsers.add_parser("paths", help="Show paths to config/token files.") + paths_parser.set_defaults(func=main_paths) + + # Command: config-dump + config_dump_parser = root_subparsers.add_parser("config-dump", help="Dump config file.", aliases=["config"]) + config_dump_parser.set_defaults(func=main_config_dump) + config_dump_parser.add_argument("--show-secrets", action="store_true", help="Don't redact secrets in the dump.") + + # Command: token-dump + token_dump_parser = root_subparsers.add_parser( + "token-dump", help="Dump OpenID Connect refresh tokens file.", aliases=["tokens"] + ) + token_dump_parser.set_defaults(func=main_token_dump) + token_dump_parser.add_argument("--show-secrets", action="store_true", help="Don't redact secrets in the dump.") + + # Command: token-clear + token_clear_parser = root_subparsers.add_parser("token-clear", help="Remove OpenID Connect refresh tokens file.") + token_clear_parser.set_defaults(func=main_token_clear) + token_clear_parser.add_argument("--force", "-f", action="store_true", help="Remove without asking confirmation.") + + # Command: add-basic + add_basic_parser = root_subparsers.add_parser("add-basic", help="Add or update config entry for basic auth.") + add_basic_parser.set_defaults(func=main_add_basic) + add_basic_parser.add_argument("backend", help="OpenEO Backend URL.") + add_basic_parser.add_argument("--username", help="Basic auth username.") + add_basic_parser.add_argument( + "--no-try", + dest="try_auth", + action="store_false", + help="Don't try out the credentials against the backend, just store them.", + ) + + # Command: add-oidc + add_oidc_parser = root_subparsers.add_parser("add-oidc", help="Add or update config entry for OpenID Connect.") + add_oidc_parser.set_defaults(func=main_add_oidc) + add_oidc_parser.add_argument("backend", help="OpenEO Backend URL.") + add_oidc_parser.add_argument("--provider-id", help="Provider ID to use.") + add_oidc_parser.add_argument("--client-id", help="Client ID to use.") + add_oidc_parser.add_argument( + "--no-client-secret", + dest="ask_client_secret", + default=True, + action="store_false", + help="Don't ask for secret (because client does not need one).", + ) + add_oidc_parser.add_argument( + "--use-default-client", action="store_true", help="Use default client (as provided by backend)." + ) + + # Command: oidc-auth + oidc_auth_parser = root_subparsers.add_parser( + "oidc-auth", help="Do OpenID Connect authentication flow and store refresh tokens." + ) + oidc_auth_parser.set_defaults(func=main_oidc_auth) + oidc_auth_parser.add_argument("backend", help="OpenEO Backend URL.") + oidc_auth_parser.add_argument("--provider-id", help="Provider ID to use.") + oidc_auth_parser.add_argument( + "--flow", choices=_OIDC_FLOW_CHOICES, default="device", help="OpenID Connect flow to use (default: device)." + ) + oidc_auth_parser.add_argument( + "--timeout", type=int, default=60, help="Timeout in seconds to wait for (user) response." + ) + + # Parse arguments and execute sub-command + args = root_parser.parse_args(argv) + logging.basicConfig(level={0: logging.WARN, 1: logging.INFO}.get(args.verbose, logging.DEBUG)) + _log.debug(repr(args)) + if args.subparser_name: + args.func(args) + else: + root_parser.print_help() + + +def main_paths(args): + """ + Print paths of auth config file and refresh token cache file. + """ + + def describe(p: Path): + if p.exists(): + return "perms: 0o{p:o}, size: {s}B".format(p=p.stat().st_mode & 0o777, s=p.stat().st_size) + else: + return "does not exist" + + config_path = AuthConfig().path + print("openEO auth config: {p} ({d})".format(p=str(config_path), d=describe(config_path))) + tokens_path = RefreshTokenStore().path + print("openEO OpenID Connect refresh token store: {p} ({d})".format(p=str(tokens_path), d=describe(tokens_path))) + + +def _redact(d: dict, keys_to_redact: List[str]): + """Redact secrets in given dict in-place.""" + for k, v in d.items(): + if k in keys_to_redact: + d[k] = "" + elif isinstance(v, dict): + _redact(v, keys_to_redact=keys_to_redact) + + +def main_config_dump(args): + """ + Dump auth config file + """ + config = AuthConfig() + print("### {p} ".format(p=str(config.path)).ljust(80, "#")) + data = config.load(empty_on_file_not_found=False) + if not args.show_secrets: + _redact(data, keys_to_redact=["client_secret", "password", "refresh_token"]) + json.dump(data, fp=sys.stdout, indent=2) + print() + + +def main_token_dump(args): + """ + Dump refresh token file + """ + tokens = RefreshTokenStore() + print("### {p} ".format(p=str(tokens.path)).ljust(80, "#")) + data = tokens.load(empty_on_file_not_found=False) + if not args.show_secrets: + _redact(data, keys_to_redact=["client_secret", "password", "refresh_token"]) + json.dump(data, fp=sys.stdout, indent=2) + print() + + +def main_token_clear(args): + """ + Remove refresh token file + """ + tokens = RefreshTokenStore() + path = tokens.path + if path.exists(): + if not args.force: + answer = builtins.input(f"Remove refresh token file {path}? 'y' or 'n': ") + if answer.lower()[:1] != "y": + print("Keeping refresh token file.") + return + tokens.remove() + print(f"Removed refresh token file {path}.") + else: + print(f"No refresh token file at {path}.") + + +def main_add_basic(args): + """ + Add a config entry for basic auth + """ + backend = args.backend + username = args.username + try_auth = args.try_auth + config = AuthConfig() + + print("Will add basic auth config for backend URL {b!r}".format(b=backend)) + print("to config file: {c!r}".format(c=str(config.path))) + + # Find username and password + if not username: + username = builtins.input("Enter username and press enter: ") + print("Using username {u!r}".format(u=username)) + password = getpass("Enter password and press enter: ") or None + + if try_auth: + print("Trying to authenticate with {b!r}".format(b=backend)) + con = connect(backend) + con.authenticate_basic(username, password) + print("Successfully authenticated {u!r}".format(u=username)) + + config.set_basic_auth(backend=backend, username=username, password=password) + print("Saved credentials to {p!r}".format(p=str(config.path))) + + +def _interactive_choice(title: str, options: List[Tuple[str, str]], attempts=10) -> str: + """ + Let user choose between options (given as dict) and return chosen key + """ + print(title) + for c, (k, v) in enumerate(options): + print("[{c:d}] {v}".format(c=c + 1, v=v)) + for _ in range(attempts): + try: + entered = builtins.input("Choose one (enter index): ") + return options[int(entered) - 1][0] + except Exception: + pass + raise CliToolException("Failed to pick valid option.") + + +def show_warning(message: str): + _log.warning(message) + + +def main_add_oidc(args): + """ + Add a config entry for OIDC auth + """ + backend = args.backend + provider_id = args.provider_id + client_id = args.client_id + ask_client_secret = args.ask_client_secret + use_default_client = args.use_default_client + config = AuthConfig() + + print("Will add OpenID Connect auth config for backend URL {b!r}".format(b=backend)) + print("to config file: {c!r}".format(c=str(config.path))) + + con = connect(backend) + con.capabilities().api_version_check.require_at_least("1.0.0") + + # Find provider ID + oidc_info = con.get("/credentials/oidc", expected_status=200).json() + providers = OrderedDict((p["id"], OidcProviderInfo.from_dict(p)) for p in oidc_info["providers"]) + + if not providers: + raise CliToolException("No OpenID Connect providers listed by backend {b!r}.".format(b=backend)) + if not provider_id: + if len(providers) == 1: + provider_id = list(providers.keys())[0] + else: + provider_id = _interactive_choice( + title="Backend {b!r} has multiple OpenID Connect providers.".format(b=backend), + options=[(p.id, "{t} (issuer {s})".format(t=p.title, s=p.issuer)) for p in providers.values()], + ) + if provider_id not in providers: + raise CliToolException( + "Invalid provider ID {p!r}. Should be one of {o}.".format(p=provider_id, o=list(providers.keys())) + ) + provider = providers[provider_id] + print("Using provider ID {p!r} (issuer {i!r})".format(p=provider_id, i=provider.issuer)) + + # Get client_id and client_secret (if necessary) + if use_default_client: + if not provider.default_clients: + show_warning("No default clients declared for provider {p!r}".format(p=provider_id)) + client_id, client_secret = None, None + else: + if not client_id: + if provider.default_clients: + client_prompt = "Enter client_id or leave empty to use default client, and press enter: " + else: + client_prompt = "Enter client_id and press enter: " + client_id = builtins.input(client_prompt).strip() or None + print("Using client ID {u!r}".format(u=client_id)) + if not client_id and not provider.default_clients: + show_warning("Given client ID was empty.") + + if client_id and ask_client_secret: + client_secret = getpass("Enter client_secret or leave empty to not use a secret, and press enter: ") or None + else: + client_secret = None + + config.set_oidc_client_config( + backend=backend, + provider_id=provider_id, + client_id=client_id, + client_secret=client_secret, + issuer=provider.issuer, + ) + print("Saved client information to {p!r}".format(p=str(config.path))) + + +_webbrowser_open = None + + +def main_oidc_auth(args): + """ + Do OIDC auth flow and store refresh tokens. + """ + backend = args.backend + oidc_flow = args.flow + provider_id = args.provider_id + timeout = args.timeout + + config = AuthConfig() + + print("Will do OpenID Connect flow to authenticate with backend {b!r}.".format(b=backend)) + print("Using config {c!r}.".format(c=str(config.path))) + + # Determine provider + provider_configs = config.get_oidc_provider_configs(backend=backend) + _log.debug("Provider configs: {c!r}".format(c=provider_configs)) + if not provider_id: + if len(provider_configs) == 0: + print("Will try to use default provider_id.") + provider_id = None + elif len(provider_configs) == 1: + provider_id = list(provider_configs.keys())[0] + else: + provider_id = _interactive_choice( + title="Multiple OpenID Connect providers available for backend {b!r}".format(b=backend), + options=sorted( + (k, "{k}: issuer {s}".format(k=k, s=v.get("issuer", "n/a"))) for k, v in provider_configs.items() + ), + ) + if not (provider_id is None or provider_id in provider_configs): + raise CliToolException( + "Invalid provider ID {p!r}. Should be `None` or one of {o}.".format( + p=provider_id, o=list(provider_configs.keys()) + ) + ) + print("Using provider ID {p!r}.".format(p=provider_id)) + + # Get client id and secret + client_id, client_secret = config.get_oidc_client_configs(backend=backend, provider_id=provider_id) + if client_id: + print("Using client ID {c!r}.".format(c=client_id)) + else: + print("Will try to use default client.") + + refresh_token_store = RefreshTokenStore() + con = Connection(backend, refresh_token_store=refresh_token_store) + if oidc_flow == "auth-code": + print("Starting OpenID Connect authorization code flow:") + print( + "a browser window should open allowing you to log in with the identity provider\n" + "and grant access to the client {c!r} (timeout: {t}s).".format(c=client_id, t=timeout) + ) + con.authenticate_oidc_authorization_code( + client_id=client_id, + client_secret=client_secret, + provider_id=provider_id, + timeout=timeout, + store_refresh_token=True, + webbrowser_open=_webbrowser_open, + ) + print("The OpenID Connect authorization code flow was successful.") + elif oidc_flow == "device": + print("Starting OpenID Connect device flow.") + con.authenticate_oidc_device( + client_id=client_id, client_secret=client_secret, provider_id=provider_id, store_refresh_token=True + ) + print("The OpenID Connect device flow was successful.") + else: + raise CliToolException("Invalid flow {f!r}".format(f=oidc_flow)) + + print("Stored refresh token in {p!r}".format(p=str(refresh_token_store.path))) + + +if __name__ == "__main__": + main() diff --git a/lib/openeo/rest/auth/config.py b/lib/openeo/rest/auth/config.py new file mode 100644 index 000000000..8890b88ac --- /dev/null +++ b/lib/openeo/rest/auth/config.py @@ -0,0 +1,240 @@ +""" +Functionality to store and retrieve authentication settings (usernames, passwords, client ids, ...) +from local config files. +""" + +# TODO: also allow to set client_id, client_secret, refresh_token through env variables? + + +import json +import logging +import platform +import stat +from datetime import datetime +from pathlib import Path +from typing import Dict, Tuple, Union + +from openeo import __version__ +from openeo.config import get_user_config_dir, get_user_data_dir +from openeo.util import deep_get, deep_set, rfc3339 + +try: + # Use oschmod when available (fall back to POSIX-only functionality from stdlib otherwise) + # TODO: enforce oschmod as dependency for all platforms? + import oschmod +except ImportError: + oschmod = None + + +PRIVATE_PERMISSIONS = stat.S_IRUSR | stat.S_IWUSR + +log = logging.getLogger(__name__) + + +def get_file_mode(path: Path) -> int: + """Get the file permission bits in a way that works on both *nix and Windows platforms.""" + if oschmod: + return oschmod.get_mode(str(path)) + return path.stat().st_mode + + +def set_file_mode(path: Path, mode: int): + """Set the file permission bits in a way that works on both *nix and Windows platforms.""" + if oschmod: + oschmod.set_mode(str(path), mode=mode) + else: + path.chmod(mode=mode) + + +def assert_private_file(path: Path): + """Check that given file is only readable by user.""" + mode = get_file_mode(path) + if (mode & stat.S_IRWXG) or (mode & stat.S_IRWXO): + message = "File {p} could be readable by others: mode {a:o} (expected: {e:o}).".format( + p=path, a=mode & (stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO), e=PRIVATE_PERMISSIONS + ) + if platform.system() == "Windows": + log.info(message) + else: + raise PermissionError(message) + + +def utcnow_rfc3339() -> str: + """Current datetime formatted as RFC-3339 string.""" + return rfc3339.datetime(datetime.utcnow()) + + +def _normalize_url(url: str) -> str: + """Normalize a url (trim trailing slash), to simplify equality checking.""" + return url.rstrip("/") or "/" + + +class PrivateJsonFile: + """ + Base class for private config/data files in JSON format. + """ + + DEFAULT_FILENAME = "private.json" + + def __init__(self, path: Path = None): + if path is None: + path = self.default_path() + if path.is_dir(): + path = path / self.DEFAULT_FILENAME + self._path = path + + @property + def path(self) -> Path: + return self._path + + @classmethod + def default_path(cls) -> Path: + return get_user_config_dir(auto_create=True) / cls.DEFAULT_FILENAME + + def load(self, empty_on_file_not_found=True) -> dict: + """Load all data from file""" + if not self._path.exists(): + if empty_on_file_not_found: + return {} + raise FileNotFoundError(self._path) + assert_private_file(self._path) + log.debug("Loading private JSON file {p}".format(p=self._path)) + # TODO: add file locking to avoid race conditions? + try: + with self._path.open("r", encoding="utf8") as f: + return json.load(f) + except Exception as e: + raise RuntimeError(f"Failed to load {type(self).__name__} from {self._path!r}: {e!r}") from e + + def _write(self, data: dict): + """Write whole data to file.""" + log.debug("Writing private JSON file {p}".format(p=self._path)) + # TODO: add file locking to avoid race conditions? + with self._path.open("w", encoding="utf8") as f: + json.dump(data, f, indent=2) + set_file_mode(self._path, mode=PRIVATE_PERMISSIONS) + assert_private_file(self._path) + + def get(self, *keys, default=None) -> Union[dict, str, int]: + """Load JSON file and do deep get with given keys.""" + result = deep_get(self.load(), *keys, default=default) + if isinstance(result, Exception) or (isinstance(result, type) and issubclass(result, Exception)): + # pylint: disable=raising-bad-type + raise result + return result + + def set(self, *keys, value): + data = self.load() + deep_set(data, *keys, value=value) + self._write(data) + + def remove(self): + if self._path.exists(): + log.debug(f"Removing {self._path}") + self._path.unlink() + + +class AuthConfig(PrivateJsonFile): + DEFAULT_FILENAME = "auth-config.json" + + @classmethod + def default_path(cls) -> Path: + return get_user_config_dir(auto_create=True) / cls.DEFAULT_FILENAME + + def _write(self, data: dict): + # When starting fresh: add some metadata and defaults + if "metadata" not in data: + data["metadata"] = { + "type": "AuthConfig", + "created": utcnow_rfc3339(), + "created_by": "openeo-python-client {v}".format(v=__version__), + "version": 1, + } + data.setdefault("general", {}) + data.setdefault("backends", {}) + return super()._write(data=data) + + def get_basic_auth(self, backend: str) -> Tuple[Union[None, str], Union[None, str]]: + """Get username/password combo for given backend. Values will be None when no config is available.""" + basic = self.get("backends", _normalize_url(backend), "basic", default={}) + username = basic.get("username") + password = basic.get("password") if username else None + return username, password + + def set_basic_auth(self, backend: str, username: str, password: Union[str, None]): + data = self.load() + keys = ( + "backends", + _normalize_url(backend), + "basic", + ) + # TODO: support multiple basic auth credentials? (pick latest by default for example) + deep_set(data, *keys, "date", value=utcnow_rfc3339()) + deep_set(data, *keys, "username", value=username) + if password: + deep_set(data, *keys, "password", value=password) + self._write(data) + + def get_oidc_provider_configs(self, backend: str) -> Dict[str, dict]: + """ + Get provider config items for given backend. + + Returns a dict mapping provider_id to dicts with "client_id" and "client_secret" items + """ + return self.get("backends", _normalize_url(backend), "oidc", "providers", default={}) + + def get_oidc_client_configs(self, backend: str, provider_id: str) -> Tuple[str, str]: + """ + Get client_id and client_secret for given backend+provider_id. Values will be None when no config is available. + """ + client = self.get("backends", _normalize_url(backend), "oidc", "providers", provider_id, default={}) + client_id = client.get("client_id") + client_secret = client.get("client_secret") if client_id else None + return client_id, client_secret + + def set_oidc_client_config( + self, + backend: str, + provider_id: str, + client_id: Union[str, None], + client_secret: Union[str, None] = None, + issuer: Union[str, None] = None, + ): + data = self.load() + keys = ("backends", _normalize_url(backend), "oidc", "providers", provider_id) + # TODO: support multiple clients? (pick latest by default for example) + deep_set(data, *keys, "date", value=utcnow_rfc3339()) + deep_set(data, *keys, "client_id", value=client_id) + deep_set(data, *keys, "client_secret", value=client_secret) + if issuer: + deep_set(data, *keys, "issuer", value=issuer) + self._write(data) + + +class RefreshTokenStore(PrivateJsonFile): + """ + Basic JSON-file based storage of refresh tokens. + """ + + DEFAULT_FILENAME = "refresh-tokens.json" + + @classmethod + def default_path(cls) -> Path: + return get_user_data_dir(auto_create=True) / cls.DEFAULT_FILENAME + + def get_refresh_token(self, issuer: str, client_id: str) -> Union[str, None]: + return self.get(_normalize_url(issuer), client_id, "refresh_token", default=None) + + def set_refresh_token(self, issuer: str, client_id: str, refresh_token: str): + data = self.load() + log.info("Storing refresh token for issuer {i!r} (client {c!r})".format(i=issuer, c=client_id)) + deep_set( + data, + _normalize_url(issuer), + client_id, + value={ + "date": utcnow_rfc3339(), + "refresh_token": refresh_token, + }, + ) + self._write(data) diff --git a/lib/openeo/rest/auth/oidc.py b/lib/openeo/rest/auth/oidc.py new file mode 100644 index 000000000..b7a79c80b --- /dev/null +++ b/lib/openeo/rest/auth/oidc.py @@ -0,0 +1,943 @@ +""" +OpenID Connect related functionality and helpers. + +""" + +from __future__ import annotations + +import base64 +import contextlib +import enum +import functools +import hashlib +import http.server +import inspect +import json +import logging +import random +import string +import threading +import time +import urllib.parse +import warnings +import webbrowser +from queue import Empty, Queue +from typing import Callable, List, NamedTuple, Optional, Tuple, Union + +import requests +import requests.exceptions + +import openeo +from openeo.internal.jupyter import in_jupyter_context +from openeo.rest import OpenEoClientException +from openeo.util import SimpleProgressBar, clip, dict_no_none, url_join + +log = logging.getLogger(__name__) + + +class QueuingRequestHandler(http.server.BaseHTTPRequestHandler): + """ + Base class for simple HTTP request handlers to be used in threaded context. + The handler puts the requested paths in a thread-safe queue + """ + + def __init__(self, *args, **kwargs): + self._queue = kwargs.pop("queue", None) or Queue() + super().__init__(*args, **kwargs) + + def do_GET(self): + log.debug("{c} GET {p}".format(c=self.__class__.__name__, p=self.path)) + status, body, headers = self.queue(self.path) + self.send_response(status) + self.send_header("Content-Length", str(len(body))) + for k, v in headers.items(): + self.send_header(k, v) + self.end_headers() + self.wfile.write(body.encode("utf-8")) + + def queue(self, path: str): + self._queue.put(path) + return 200, "queued", {} + + @classmethod + def with_queue(cls, queue: Queue): + """Create a factory for this object pre-bound with given queue object""" + return functools.partial(cls, queue=queue) + + def log_message(self, format, *args): + # Override the default implementation, which is a hardcoded `sys.stderr.write` + log.debug(format % args) + + +class OAuthRedirectRequestHandler(QueuingRequestHandler): + """Request handler for OAuth redirects""" + + PATH = "/callback" + + TEMPLATE = """ + + openEO OIDC auth + + {content} +

openEO Python client {version}

+ + + """ + + def queue(self, path: str): + if path.startswith(self.PATH + "?"): + super().queue(path) + # TODO: auto-close browser tab/window? + # TODO: make it a nicer page and bit more of metadata? + status = 200 + content = "

OIDC Redirect URL request received.

You can close this browser tab now.

" + else: + status = 404 + content = "

Not found.

" + body = self.TEMPLATE.format(content=content, version=openeo.client_version()) + return status, body, {"Content-Type": "text/html; charset=UTF-8"} + + +class HttpServerThread(threading.Thread): + """ + Thread that runs a HTTP server (`http.server.HTTPServer`) + """ + + def __init__(self, RequestHandlerClass, server_address: Tuple[str, int] = None): + # Make it a daemon to minimize potential shutdown issues due to `serve_forever` + super().__init__(daemon=True) + self._RequestHandlerClass = RequestHandlerClass + # Server address ('', 0): listen on all ips and let OS pick a free port + self._server_address = server_address or ("", 0) + self._server = None + + def start(self): + self._server = http.server.HTTPServer(self._server_address, self._RequestHandlerClass) + self._log_status("start thread") + super().start() + + def run(self): + self._log_status("start serving") + self._server.serve_forever() + self._log_status("stop serving") + + def shutdown(self): + self._log_status("shut down thread") + self._server.shutdown() + + def server_address_info(self) -> Tuple[int, str, str]: + """ + Get server address info: (port, host_address, fully_qualified_domain_name) + """ + if self._server is None: + raise RuntimeError("Server is not set up yet") + return self._server.server_port, self._server.server_address[0], self._server.server_name + + def _log_status(self, message): + port, host, fqdn = self.server_address_info() + log.info("{c}: {m} (at {h}:{p}, {f})".format(c=self.__class__.__name__, m=message, h=host, p=port, f=fqdn)) + + def __enter__(self): + self.start() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.shutdown() + self.join() + self._log_status("thread joined") + + +def create_timer() -> Callable[[], float]: + """Create a timer function that returns elapsed time since creation of the timer function""" + start = time.time() + + def elapsed(): + return time.time() - start + + return elapsed + + +def drain_queue( + queue: Queue, initial_timeout: float = 10, item_minimum: int = 1, tail_timeout=5, on_empty=lambda **kwargs: None +): + """ + Drain the given queue, requiring at least a given number of items (within an initial timeout). + + :param queue: queue to drain + :param initial_timeout: time in seconds within which a minimum number of items should be fetched + :param item_minimum: minimum number of items to fetch + :param tail_timeout: additional timeout to abort when queue doesn't get empty + :param on_empty: callable to call when/while queue is empty + :return: generator of items from the queue + """ + elapsed = create_timer() + + count = 0 + while True: + try: + yield queue.get(timeout=initial_timeout / 10) + count += 1 + except Empty: + on_empty(elapsed=elapsed(), count=count) + + if elapsed() > initial_timeout and count < item_minimum: + raise TimeoutError( + "Items after initial {t} timeout: {c} (<{m})".format(c=count, m=item_minimum, t=initial_timeout) + ) + if queue.empty() and count >= item_minimum: + break + if elapsed() > initial_timeout + tail_timeout: + warnings.warn("Queue still not empty after overall timeout: aborting.") + break + + +def random_string(length=32, characters: str = None): + """ + Build a random string from given characters (alphanumeric by default) + """ + # TODO: move this to a utils module? + characters = characters or (string.ascii_letters + string.digits) + return "".join(random.choice(characters) for _ in range(length)) + + +class OidcException(OpenEoClientException): + pass + + +class AccessTokenResult(NamedTuple): + """Container for result of access_token request.""" + + access_token: str + id_token: Optional[str] = None + refresh_token: Optional[str] = None + + +def jwt_decode(token: str) -> Tuple[dict, dict]: + """ + Poor man's JWT decoding + TODO: use a real library that also handles verification properly? + """ + + def _decode(data: str) -> dict: + decoded = base64.b64decode(data + "=" * (4 - len(data) % 4)).decode("ascii") + return json.loads(decoded) + + header, payload, signature = token.split(".") + return _decode(header), _decode(payload) + + +class DefaultOidcClientGrant(enum.Enum): + """ + Enum with possible values for "grant_types" field of default OIDC clients provided by backend. + """ + + IMPLICIT = "implicit" + AUTH_CODE = "authorization_code" + AUTH_CODE_PKCE = "authorization_code+pkce" + DEVICE_CODE = "urn:ietf:params:oauth:grant-type:device_code" + DEVICE_CODE_PKCE = "urn:ietf:params:oauth:grant-type:device_code+pkce" + REFRESH_TOKEN = "refresh_token" + + +# Type hint for function that checks if given list of OIDC grant types (DefaultOidcClientGrant enum values) +# fulfills a criterion. +GrantsChecker = Union[List[DefaultOidcClientGrant], Callable[[List[DefaultOidcClientGrant]], bool]] + + +class OidcProviderInfo: + """OpenID Connect Provider information, as provided by an openEO back-end (endpoint `/credentials/oidc`)""" + + def __init__( + self, + issuer: str = None, + discovery_url: str = None, + scopes: List[str] = None, + provider_id: str = None, + title: str = None, + default_clients: Union[List[dict], None] = None, + requests_session: Optional[requests.Session] = None, + ): + # TODO: id and title are required in the openEO API spec. + self.id = provider_id + self.title = title + if discovery_url: + self.discovery_url = discovery_url + elif issuer: + self.discovery_url = url_join(issuer, "/.well-known/openid-configuration") + else: + raise ValueError("At least `issuer` or `discovery_url` should be specified") + if not requests_session: + requests_session = requests.Session() + discovery_resp = requests_session.get(self.discovery_url, timeout=20) + discovery_resp.raise_for_status() + self.config = discovery_resp.json() + self.issuer = issuer or self.config["issuer"] + # Minimal set of scopes to request + self._supported_scopes = self.config.get("scopes_supported", ["openid"]) + self._scopes = {"openid"}.union(scopes or []).intersection(self._supported_scopes) + log.debug(f"Scopes: provider supported {self._supported_scopes} & backend desired {scopes} -> {self._scopes}") + self.default_clients = default_clients + + @classmethod + def from_dict(cls, data: dict) -> OidcProviderInfo: + return cls( + provider_id=data["id"], + title=data["title"], + issuer=data["issuer"], + scopes=data.get("scopes"), + default_clients=data.get("default_clients"), + ) + + def get_scopes_string(self, request_refresh_token: bool = False) -> str: + """ + Build "scope" string for authentication request. + + :param request_refresh_token: include "offline_access" scope (if supported), + which some OIDC providers require in order to return refresh token + :return: space separated scope listing as single string + """ + scopes = self._scopes + if request_refresh_token and "offline_access" in self._supported_scopes: + scopes = scopes | {"offline_access"} + log.debug("Using scopes: {s}".format(s=scopes)) + return " ".join(sorted(scopes)) + + def get_default_client_id(self, grant_check: GrantsChecker) -> Union[str, None]: + """ + Get first default client that supports (as stated by provider's `grant_types`) + the desired grant types (as implemented by `grant_check`) + """ + if isinstance(grant_check, list): + # Simple `grant_check` mode: just provide list of grants that all must be supported. + desired_grants = grant_check + grant_check = lambda grants: all(g in grants for g in desired_grants) + + def normalize_grants(grants: List[str]): + for grant in grants: + try: + yield DefaultOidcClientGrant(grant) + except ValueError: + log.warning(f"Invalid OIDC grant type {grant!r}.") + + for client in self.default_clients or []: + client_id = client.get("id") + supported_grants = client.get("grant_types") + supported_grants = list(normalize_grants(supported_grants)) + if client_id and supported_grants and grant_check(supported_grants): + return client_id + + +class OidcClientInfo: + """ + Simple container holding basic info of an OIDC client + """ + + __slots__ = ["client_id", "provider", "client_secret"] + + def __init__(self, client_id: str, provider: OidcProviderInfo, client_secret: Optional[str] = None): + self.client_id = client_id + self.provider = provider + self.client_secret = client_secret + # TODO: also info client type (desktop app, web app, SPA, ...)? + + # TODO: load from config file + + def guess_device_flow_pkce_support(self): + """Best effort guess if PKCE should be used for device auth grant""" + # Check if this client is also defined as default client with device_code+pkce + default_clients = [c for c in self.provider.default_clients or [] if c["id"] == self.client_id] + grant_types = set(g for c in default_clients for g in c.get("grant_types", [])) + return any("device_code+pkce" in g for g in grant_types) + + +class OidcAuthenticator: + """ + Base class for OpenID Connect authentication flows. + """ + + grant_type = NotImplemented + + def __init__( + self, + client_info: OidcClientInfo, + requests_session: Optional[requests.Session] = None, + ): + self._client_info = client_info + self._provider_config = client_info.provider.config + # TODO: check provider config (e.g. if grant type is supported) + self._requests = requests_session or requests.Session() + + @property + def client_info(self) -> OidcClientInfo: + return self._client_info + + @property + def client_id(self) -> str: + return self._client_info.client_id + + @property + def client_secret(self) -> str: + return self._client_info.client_secret + + @property + def provider_info(self) -> OidcProviderInfo: + return self._client_info.provider + + def get_tokens(self, request_refresh_token: bool = False) -> AccessTokenResult: + """Get access_token and possibly id_token+refresh_token.""" + result = self._do_token_post_request(post_data=self._get_token_endpoint_post_data()) + return self._get_access_token_result(result) + + def _get_token_endpoint_post_data(self) -> dict: + """Build POST data dict to send to token endpoint""" + return { + "grant_type": self.grant_type, + "client_id": self.client_id, + } + + def _do_token_post_request(self, post_data: dict) -> dict: + """Do POST to token endpoint to get access token""" + token_endpoint = self._provider_config["token_endpoint"] + log.info( + "Doing {g!r} token request {u!r} with post data fields {p!r} (client_id {c!r})".format( + g=self.grant_type, c=self.client_id, u=token_endpoint, p=list(post_data.keys()) + ) + ) + try: + resp = self._requests.post(url=token_endpoint, data=post_data) + except requests.exceptions.RequestException as e: + raise OidcException(f"Failed to retrieve access token at {token_endpoint!r}: {e!r}") from e + if resp.status_code != 200: + # TODO: are other status_code values valid too? + raise OidcException( + "Failed to retrieve access token at {u!r}: {s} {r!r} {t!r}".format( + s=resp.status_code, r=resp.reason, u=resp.url, t=resp.text + ) + ) + + result = resp.json() + return result + + def _get_access_token_result(self, data: dict, expected_nonce: str = None) -> AccessTokenResult: + """Parse JSON result from token request""" + redacted = { + k: v if k in ["expires_in", "refresh_expires_in", "token_type", "scope"] else "" + for k, v in data.items() + } + log.debug(f"Extracting access token result from token response {redacted}") + return AccessTokenResult( + access_token=self._extract_token(data, "access_token"), + id_token=self._extract_token(data, "id_token", expected_nonce=expected_nonce, allow_absent=True), + refresh_token=self._extract_token(data, "refresh_token", allow_absent=True), + ) + + @staticmethod + def _extract_token(data: dict, key: str, expected_nonce: str = None, allow_absent=False) -> Union[str, None]: + """ + Extract token of given type ("access_token", "id_token", "refresh_token") from a token JSON response + """ + try: + token = data[key] + except KeyError: + if allow_absent: + return + raise OidcException("No {k!r} in response".format(k=key)) + if expected_nonce: + # TODO: verify the JWT properly? + _, payload = jwt_decode(token) + if payload["nonce"] != expected_nonce: + raise OidcException("Invalid nonce in {k}".format(k=key)) + return token + + +class PkceCode: + """ + Simple container for PKCE code verifier and code challenge. + + PKCE, pronounced "pixy", is short for "Proof Key for Code Exchange". + Also see https://tools.ietf.org/html/rfc7636 + """ + + __slots__ = ["code_verifier", "code_challenge", "code_challenge_method"] + + def __init__(self): + self.code_verifier = random_string(64) + # Only SHA256 is supported for now. + self.code_challenge_method = "S256" + self.code_challenge = PkceCode.sha256_hash(self.code_verifier) + + @staticmethod + def sha256_hash(code: str) -> str: + """Apply SHA256 hash to code verifier to get code challenge""" + data = hashlib.sha256(code.encode("ascii")).digest() + return base64.urlsafe_b64encode(data).decode("ascii").replace("=", "") + + +class AuthCodeResult(NamedTuple): + auth_code: str + nonce: str + code_verifier: str + redirect_uri: str + + +class OidcAuthCodePkceAuthenticator(OidcAuthenticator): + """ + Implementation of OpenID Connect authentication using OAuth Authorization Code Flow with PKCE. + + This flow is to be used for interactive use cases (e.g. user is working in a Jupyter/IPython notebook). + + It goes roughly like this: + - A short living HTTP server is started in a side-thread to serve the redirect URI + that is required in this flow. + - A browser window/tab is opened showing the (third party) Identity Provider authorization endpoint + - (if not already:) User authenticates with the Identity Provider (e.g. with username and password) + - Identity Provider forwards to the redirect URI (which is served locally by the side-thread), + sending an authorization code (among others) along + - The request handler in the side thread captures the redirect and passes it to the main thread (through a queue) + - The main extracts the necessary information from the redirect request (like the authorization code) + and shuts down the side thread + - The authorization code is exchanged for an access code and id token + - The access code can be used as bearer token for subsequent API calls + + .. deprecated:: 0.19.0 + Usage of the Authorization Code flow is deprecated (because of its complexity) and will be removed. + """ + + grant_type = "authorization_code" + + TIMEOUT_DEFAULT = 60 + + def __init__( + self, + client_info: OidcClientInfo, + webbrowser_open: Callable = None, + timeout: int = None, + server_address: Tuple[str, int] = None, + requests_session: Optional[requests.Session] = None, + ): + super().__init__(client_info=client_info, requests_session=requests_session) + self._webbrowser_open = webbrowser_open or webbrowser.open + self._authentication_timeout = timeout or self.TIMEOUT_DEFAULT + self._server_address = server_address + + def _get_auth_code(self, request_refresh_token: bool = False) -> AuthCodeResult: + """ + Do OAuth authentication request and catch redirect to extract authentication code + :return: + """ + state = random_string(32) + nonce = random_string(21) + pkce = PkceCode() + + # Set up HTTP server (in separate thread) to catch OAuth redirect URL + callback_queue = Queue() + RequestHandlerClass = OAuthRedirectRequestHandler.with_queue(callback_queue) + http_server_thread = HttpServerThread( + RequestHandlerClass=RequestHandlerClass, server_address=self._server_address + ) + with http_server_thread: + port, host, fqdn = http_server_thread.server_address_info() + # TODO: use fully qualified domain name instead of "localhost"? + # Otherwise things won't work when the client is for example + # running in a remotely hosted Jupyter setup. + # Maybe even FQDN will not resolve properly in the user's browser + # and we need additional means to get a working hostname? + redirect_uri = "http://localhost:{p}".format(f=fqdn, p=port) + OAuthRedirectRequestHandler.PATH + log.info("Using OAuth redirect URI {u!r}".format(u=redirect_uri)) + + # Build authentication URL + auth_url = "{endpoint}?{params}".format( + endpoint=self._provider_config["authorization_endpoint"], + params=urllib.parse.urlencode( + { + "response_type": "code", + "client_id": self.client_id, + "scope": self._client_info.provider.get_scopes_string( + request_refresh_token=request_refresh_token + ), + "redirect_uri": redirect_uri, + "state": state, + "nonce": nonce, + "code_challenge": pkce.code_challenge, + "code_challenge_method": pkce.code_challenge_method, + } + ), + ) + log.info("Sending user to auth URL {u!r}".format(u=auth_url)) + # Open browser window/tab with authentication URL + self._webbrowser_open(auth_url) + + # TODO: show some feedback here that we are waiting browser based interaction here? + + try: + # Collect data from redirect uri + log.info("Waiting for request to redirect URI (timeout {t}s)".format(t=self._authentication_timeout)) + # TODO: When authentication fails (e.g. identity provider is down), this might hang the client + # (e.g. jupyter notebook). Is there a way to abort this? use signals? handle "abort" request? + callbacks = list( + drain_queue( + callback_queue, + initial_timeout=self._authentication_timeout, + on_empty=lambda **kwargs: log.info( + "No result yet (elapsed: {e:.2f}s)".format(e=kwargs.get("elapsed", 0)) + ), + ) + ) + except TimeoutError: + raise OidcException( + "Timeout: no request to redirect URI after {t}s".format(t=self._authentication_timeout) + ) + + if len(callbacks) != 1: + raise OidcException("Expected 1 OAuth redirect request, but got: {c}".format(c=len(callbacks))) + + # Parse OAuth redirect URL + redirect_request = callbacks[0] + log.debug("Parsing redirect request {r}".format(r=redirect_request)) + redirect_params = urllib.parse.parse_qs(urllib.parse.urlparse(redirect_request).query) + log.debug("Parsed redirect request: {p}".format(p=redirect_params)) + if "state" not in redirect_params or redirect_params["state"] != [state]: + raise OidcException("Invalid state") + if "code" not in redirect_params: + raise OidcException("No auth code in redirect") + auth_code = redirect_params["code"][0] + + return AuthCodeResult( + auth_code=auth_code, nonce=nonce, code_verifier=pkce.code_verifier, redirect_uri=redirect_uri + ) + + def get_tokens(self, request_refresh_token: bool = False) -> AccessTokenResult: + """ + Do OpenID authentication flow with PKCE: + get auth code and exchange for access and id token + """ + # Get auth code from authentication provider + auth_code_result = self._get_auth_code(request_refresh_token=request_refresh_token) + + # Exchange authentication code for access token + result = self._do_token_post_request( + post_data=dict_no_none( + grant_type=self.grant_type, + client_id=self.client_id, + client_secret=self.client_secret, + redirect_uri=auth_code_result.redirect_uri, + code=auth_code_result.auth_code, + code_verifier=auth_code_result.code_verifier, + ) + ) + + return self._get_access_token_result(result, expected_nonce=auth_code_result.nonce) + + +class OidcClientCredentialsAuthenticator(OidcAuthenticator): + """ + Implementation of "Client Credentials" Flow. + """ + + grant_type = "client_credentials" + + def _get_token_endpoint_post_data(self) -> dict: + data = super()._get_token_endpoint_post_data() + data["client_secret"] = self.client_secret + data["scope"] = self._client_info.provider.get_scopes_string() + return data + + +class OidcResourceOwnerPasswordAuthenticator(OidcAuthenticator): + """ + Implementation of "Resource Owner Password Credentials" (ROPC) grant type. + + Note: This flow should only be used when end user owns (or highly trusts) the client code + and the password can be handled/stored/retrieved in a secure manner. + """ + + grant_type = "password" + + def __init__( + self, + client_info: OidcClientInfo, + username: str, + password: str, + requests_session: Optional[requests.Session] = None, + ): + super().__init__(client_info=client_info, requests_session=requests_session) + self._username = username + self._password = password + + def _get_token_endpoint_post_data(self) -> dict: + data = super()._get_token_endpoint_post_data() + data["client_secret"] = self.client_secret + data["scope"] = self._client_info.provider.get_scopes_string() + data["username"] = self._username + data["password"] = self._password + return data + + +class OidcRefreshTokenAuthenticator(OidcAuthenticator): + """ + Implementation of obtaining a new OpenID Connect access token through a refresh token. + """ + + grant_type = "refresh_token" + + def __init__( + self, + client_info: OidcClientInfo, + refresh_token: str, + requests_session: Optional[requests.Session] = None, + ): + super().__init__(client_info=client_info, requests_session=requests_session) + self._refresh_token = refresh_token + + def _get_token_endpoint_post_data(self) -> dict: + data = super()._get_token_endpoint_post_data() + if self.client_secret: + data["client_secret"] = self.client_secret + data["refresh_token"] = self._refresh_token + return data + + +class VerificationInfo(NamedTuple): + verification_uri: str + verification_uri_complete: Optional[str] + device_code: str + user_code: str + interval: int + + +def _like_print(display: Callable) -> Callable: + """Ensure that display function supports an `end` argument like `print`""" + if display is print or "end" in inspect.signature(display).parameters: + return display + else: + return lambda *args, end="\n", **kwargs: display(*args, **kwargs) + + +class _BasicDeviceCodePollUi: + """ + Basic (print + carriage return) implementation of the device code + polling loop UI (e.g. show progress bar and status). + """ + + def __init__( + self, + timeout: float, + elapsed: Callable[[], float], + max_width: int = 80, + display: Callable = print, + ): + self.timeout = timeout + self.elapsed = elapsed + self._max_width = max_width + self._status = "Authorization pending" + self._display = _like_print(display) + self._progress_bar = SimpleProgressBar(width=(max_width - 1) // 2) + + def _instructions(self, info: VerificationInfo) -> str: + if info.verification_uri_complete: + return f"Visit {info.verification_uri_complete} to authenticate." + else: + return f"Visit {info.verification_uri} and enter user code {info.user_code!r} to authenticate." + + def show_instructions(self, info: VerificationInfo) -> None: + self._display(self._instructions(info=info)) + + def set_status(self, status: str): + self._status = status + + def show_progress(self, status: Optional[str] = None, include_bar: bool = True): + if status: + self.set_status(status) + text = self._status + if include_bar: + progress_bar = self._progress_bar.get(fraction=1.0 - self.elapsed() / self.timeout) + text = f"{progress_bar} {text}" + self._display(f"{text[:self._max_width]: <{self._max_width}s}", end="\r") + + def close(self): + self._display("", end="\n") + + +class _JupyterDeviceCodePollUi(_BasicDeviceCodePollUi): + def __init__( + self, + timeout: float, + elapsed: Callable[[], float], + max_width: int = 80, + ): + super().__init__(timeout=timeout, elapsed=elapsed, max_width=max_width) + import IPython.display + + self._instructions_display = IPython.display.display({"text/html": " "}, raw=True, display_id=True) + self._progress_display = IPython.display.display({"text/html": " "}, raw=True, display_id=True) + + def _instructions(self, info: VerificationInfo) -> str: + url = info.verification_uri_complete if info.verification_uri_complete else info.verification_uri + instructions = ( + f'Visit {url}' + ) + instructions += f' 📋' + if not info.verification_uri_complete: + instructions += f" and enter user code {info.user_code!r}" + instructions += " to authenticate." + return instructions + + def show_instructions(self, info: VerificationInfo) -> None: + self._instructions_display.update({"text/html": self._instructions(info=info)}, raw=True) + + def show_progress(self, status: Optional[str] = None, include_bar: bool = True): + if status: + self.set_status(status) + icon = self._status_icon(self._status) + text = f"{icon} {self._status}" + if include_bar: + progress_bar = self._progress_bar.get(fraction=1.0 - self.elapsed() / self.timeout) + text = f"{progress_bar} {text}" + self._progress_display.update({"text/html": text}, raw=True) + + def _status_icon(self, status: str) -> str: + status = status.lower() + if "polling" in status or "pending" in status: + return "\u231B" # Hourglass + elif "success" in status: + return "\u2705" # Green check mark + elif "timed out" in status: + return "\u274C" # Red cross mark + else: + return "" + + def close(self): + pass + + +class OidcDeviceCodePollTimeout(OidcException): + pass + + +class OidcDeviceAuthenticator(OidcAuthenticator): + """ + Implementation of OAuth Device Authorization grant/flow + """ + + grant_type = "urn:ietf:params:oauth:grant-type:device_code" + + DEFAULT_MAX_POLL_TIME = 5 * 60 + + def __init__( + self, + client_info: OidcClientInfo, + display: Callable[[str], None] = print, + device_code_url: Optional[str] = None, + max_poll_time: float = DEFAULT_MAX_POLL_TIME, + use_pkce: Optional[bool] = None, + requests_session: Optional[requests.Session] = None, + ): + super().__init__(client_info=client_info, requests_session=requests_session) + self._display = display + # Allow to specify/override device code URL for cases when it is not available in OIDC discovery doc. + self._device_code_url = device_code_url or self._provider_config.get("device_authorization_endpoint") + if not self._device_code_url: + raise OidcException("No support for device authorization grant") + self._max_poll_time = max_poll_time + if use_pkce is None: + use_pkce = client_info.client_secret is None and client_info.guess_device_flow_pkce_support() + self._pkce = PkceCode() if use_pkce else None + + def _get_verification_info(self, request_refresh_token: bool = False) -> VerificationInfo: + """Get verification URL and user code""" + post_data = { + "client_id": self.client_id, + "scope": self._client_info.provider.get_scopes_string(request_refresh_token=request_refresh_token), + } + if self._pkce: + post_data["code_challenge"] = (self._pkce.code_challenge,) + post_data["code_challenge_method"] = self._pkce.code_challenge_method + resp = self._requests.post(url=self._device_code_url, data=post_data) + if resp.status_code != 200: + raise OidcException( + "Failed to get verification URL and user code from {u!r}: {s} {r!r} {t!r}".format( + s=resp.status_code, r=resp.reason, u=resp.url, t=resp.text + ) + ) + try: + data = resp.json() + verification_info = VerificationInfo( + # Google OAuth/OIDC implementation uses non standard "verification_url" instead of "verification_uri" + verification_uri=data["verification_uri"] if "verification_uri" in data else data["verification_url"], + # verification_uri_complete is optional, will be None if this key is not present + verification_uri_complete=data.get("verification_uri_complete"), + device_code=data["device_code"], + user_code=data["user_code"], + interval=data.get("interval", 5), + ) + except Exception as e: + raise OidcException("Failed to parse device authorization request: {e!r}".format(e=e)) + log.debug("Verification info: %r", verification_info) + return verification_info + + def get_tokens(self, request_refresh_token: bool = False) -> AccessTokenResult: + # Get verification url and user code + verification_info = self._get_verification_info(request_refresh_token=request_refresh_token) + + # Poll token endpoint + token_endpoint = self._provider_config["token_endpoint"] + post_data = { + "client_id": self.client_id, + "device_code": verification_info.device_code, + "grant_type": self.grant_type, + } + if self._pkce: + post_data["code_verifier"] = self._pkce.code_verifier + else: + post_data["client_secret"] = self.client_secret + + poll_interval = verification_info.interval + log.debug("Start polling token endpoint (interval {i}s)".format(i=poll_interval)) + + elapsed = create_timer() + next_poll = elapsed() + poll_interval + # TODO: let poll UI determine sleep interval? + sleep = clip(self._max_poll_time / 100, min=1, max=5) + + if in_jupyter_context(): + poll_ui = _JupyterDeviceCodePollUi(timeout=self._max_poll_time, elapsed=elapsed) + else: + poll_ui = _BasicDeviceCodePollUi(timeout=self._max_poll_time, elapsed=elapsed, display=self._display) + poll_ui.show_instructions(info=verification_info) + + with contextlib.closing(poll_ui): + while elapsed() <= self._max_poll_time: + poll_ui.show_progress() + time.sleep(sleep) + + if elapsed() >= next_poll: + log.debug( + f"Doing {self.grant_type!r} token request {token_endpoint!r} with post data fields {list(post_data.keys())!r} (client_id {self.client_id!r})" + ) + poll_ui.show_progress(status="Polling") + # TODO: skip occasional failing requests? (e.g. see `SkipIntermittentFailures` from openeo-aggregator) + resp = self._requests.post(url=token_endpoint, data=post_data, timeout=5) + if resp.status_code == 200: + log.info(f"[{elapsed():5.1f}s] Authorized successfully.") + poll_ui.show_progress(status="Authorized successfully", include_bar=False) + return self._get_access_token_result(data=resp.json()) + else: + try: + error = resp.json()["error"] + except Exception: + error = "unknown" + log.info(f"[{elapsed():5.1f}s] not authorized yet: {error}") + if error == "authorization_pending": + poll_ui.show_progress(status="Authorization pending") + elif error == "slow_down": + poll_ui.show_progress(status="Slowing down") + poll_interval += 5 + else: + # TODO: skip occasional glitches (e.g. see `SkipIntermittentFailures` from openeo-aggregator) + raise OidcException( + f"Failed to retrieve access token at {token_endpoint!r}: {resp.status_code} {resp.reason!r} {resp.text!r}" + ) + next_poll = elapsed() + poll_interval + + poll_ui.show_progress(status="Timed out", include_bar=False) + raise OidcDeviceCodePollTimeout(f"Timeout ({self._max_poll_time:.1f}s) while polling for access token.") diff --git a/lib/openeo/rest/auth/testing.py b/lib/openeo/rest/auth/testing.py new file mode 100644 index 000000000..651abd21f --- /dev/null +++ b/lib/openeo/rest/auth/testing.py @@ -0,0 +1,292 @@ +""" +Helpers, mocks for testing (OIDC) authentication +""" + +import base64 +import contextlib +import json +import urllib.parse +import uuid +from typing import List, Optional, Union +from unittest import mock + +import requests +import requests_mock.request + +from openeo.rest.auth.oidc import PkceCode, random_string +from openeo.util import dict_no_none, url_join + +DEVICE_CODE_POLL_INTERVAL = 2 + + +# Sentinel object to indicate that a field should be absent. +ABSENT = object() + + +class OidcMock: + """ + Fixture/mock to act as stand-in OIDC provider to test OIDC flows + """ + + def __init__( + self, + requests_mock: requests_mock.Mocker, + *, + expected_grant_type: Optional[str] = None, + oidc_issuer: str = "https://oidc.test", + expected_client_id: str = "myclient", + expected_fields: dict = None, + state: dict = None, + scopes_supported: List[str] = None, + device_code_flow_support: bool = True, + oidc_discovery_url: Optional[str] = None, + support_verification_uri_complete: bool = False, + ): + self.requests_mock = requests_mock + self.oidc_issuer = oidc_issuer + self.expected_grant_type = expected_grant_type + self.grant_request_history = [] + self.expected_client_id = expected_client_id + self.expected_fields = expected_fields or {} + self.expected_authorization_code = None + self.authorization_endpoint = url_join(self.oidc_issuer, "/auth") + self.token_endpoint = url_join(self.oidc_issuer, "/token") + self.device_code_endpoint = url_join(self.oidc_issuer, "/device_code") if device_code_flow_support else None + self.state = state or {} + self.scopes_supported = scopes_supported or ["openid", "email", "profile"] + self.support_verification_uri_complete = support_verification_uri_complete + self.mocks = {} + + oidc_discovery_url = oidc_discovery_url or url_join(oidc_issuer, "/.well-known/openid-configuration") + self.mocks["oidc_discovery"] = self.requests_mock.get( + oidc_discovery_url, + text=json.dumps( + dict_no_none( + { + # Rudimentary OpenID Connect discovery document + "issuer": self.oidc_issuer, + "authorization_endpoint": self.authorization_endpoint, + "token_endpoint": self.token_endpoint, + "device_authorization_endpoint": self.device_code_endpoint, + "scopes_supported": self.scopes_supported, + } + ) + ), + ) + self.mocks["token_endpoint"] = self.requests_mock.post(self.token_endpoint, text=self.token_callback) + + if self.device_code_endpoint: + self.mocks["device_code_endpoint"] = self.requests_mock.post( + self.device_code_endpoint, text=self.device_code_callback + ) + + def webbrowser_open(self, url: str): + """Doing fake browser and Oauth Provider handling here""" + assert url.startswith(self.authorization_endpoint) + params = self._get_query_params(url=url) + assert params["client_id"] == self.expected_client_id + assert params["response_type"] == "code" + assert params["scope"] == self.expected_fields["scope"] + for key in ["state", "nonce", "code_challenge", "redirect_uri", "scope"]: + self.state[key] = params[key] + redirect_uri = params["redirect_uri"] + # Don't mock the request to the redirect URI (it is hosted by the temporary web server in separate thread) + self.requests_mock.get(redirect_uri, real_http=True) + self.expected_authorization_code = "6uthc0d3" + requests.get( + redirect_uri, + params={"state": params["state"], "code": self.expected_authorization_code}, + ) + + def token_callback(self, request: requests_mock.request._RequestObjectProxy, context): + params = self._get_query_params(query=request.text) + grant_type = params["grant_type"] + self.grant_request_history.append({"grant_type": grant_type}) + if self.expected_grant_type: + assert grant_type == self.expected_grant_type + callback = { + "authorization_code": self.token_callback_authorization_code, + "client_credentials": self.token_callback_client_credentials, + "password": self.token_callback_resource_owner_password_credentials, + "urn:ietf:params:oauth:grant-type:device_code": self.token_callback_device_code, + "refresh_token": self.token_callback_refresh_token, + }[grant_type] + result = callback(params=params, context=context) + try: + result_decoded = json.loads(result) + self.grant_request_history[-1]["response"] = result_decoded + except json.JSONDecodeError: + self.grant_request_history[-1]["response"] = result + return result + + def token_callback_authorization_code(self, params: dict, context): + """Fake code to token exchange by Oauth Provider""" + assert params["client_id"] == self.expected_client_id + assert params["grant_type"] == "authorization_code" + assert self.state["code_challenge"] == PkceCode.sha256_hash(params["code_verifier"]) + assert params["code"] == self.expected_authorization_code + assert params["redirect_uri"] == self.state["redirect_uri"] + return self._build_token_response() + + def token_callback_client_credentials(self, params: dict, context): + assert params["client_id"] == self.expected_client_id + assert params["grant_type"] == "client_credentials" + assert params["scope"] == self.expected_fields["scope"] + assert params["client_secret"] == self.expected_fields["client_secret"] + return self._build_token_response(include_id_token=False, include_refresh_token=False) + + def token_callback_resource_owner_password_credentials(self, params: dict, context): + assert params["client_id"] == self.expected_client_id + assert params["grant_type"] == "password" + assert params["client_secret"] == self.expected_fields["client_secret"] + assert params["username"] == self.expected_fields["username"] + assert params["password"] == self.expected_fields["password"] + assert params["scope"] == self.expected_fields["scope"] + return self._build_token_response() + + def device_code_callback(self, request: requests_mock.request._RequestObjectProxy, context): + params = self._get_query_params(query=request.text) + assert params["client_id"] == self.expected_client_id + assert params["scope"] == self.expected_fields["scope"] + self.state["device_code"] = random_string() + self.state["user_code"] = random_string(length=6).upper() + self.state["scope"] = params["scope"] + if "code_challenge" in self.expected_fields: + expect_code_challenge = self.expected_fields.get("code_challenge") + if expect_code_challenge in [True]: + assert "code_challenge" in params + self.state["code_challenge"] = params["code_challenge"] + elif expect_code_challenge in [False, ABSENT]: + assert "code_challenge" not in params + else: + raise ValueError(expect_code_challenge) + + response = { + # TODO: also verification_url (google tweak) + "verification_uri": url_join(self.oidc_issuer, "/dc"), + "device_code": self.state["device_code"], + "user_code": self.state["user_code"], + "interval": DEVICE_CODE_POLL_INTERVAL, + } + if self.support_verification_uri_complete: + response["verification_uri_complete"] = ( + response["verification_uri"] + f"?user_code={self.state['user_code']}" + ) + return json.dumps(response) + + def token_callback_device_code(self, params: dict, context): + assert params["client_id"] == self.expected_client_id + expected_client_secret = self.expected_fields.get("client_secret") + if expected_client_secret: + assert params["client_secret"] == expected_client_secret + else: + assert "client_secret" not in params + expect_code_verifier = self.expected_fields.get("code_verifier") + if expect_code_verifier in [True]: + assert PkceCode.sha256_hash(params["code_verifier"]) == self.state["code_challenge"] + self.state["code_verifier"] = params["code_verifier"] + elif expect_code_verifier in [False, None, ABSENT]: + assert "code_verifier" not in params + assert "code_challenge" not in self.state + else: + raise ValueError(expect_code_verifier) + assert params["device_code"] == self.state["device_code"] + assert params["grant_type"] == "urn:ietf:params:oauth:grant-type:device_code" + # Fail with pending/too fast? + try: + result = self.state["device_code_callback_timeline"].pop(0) + except Exception: + result = "rest in peace" + if result == "great success": + return self._build_token_response() + else: + context.status_code = 400 + return json.dumps({"error": result}) + + def token_callback_refresh_token(self, params: dict, context): + assert params["client_id"] == self.expected_client_id + assert params["grant_type"] == "refresh_token" + if "client_secret" in self.expected_fields: + assert params["client_secret"] == self.expected_fields["client_secret"] + if params["refresh_token"] != self.expected_fields["refresh_token"]: + context.status_code = 401 + return json.dumps({"error": "invalid refresh token"}) + assert params["refresh_token"] == self.expected_fields["refresh_token"] + return self._build_token_response(include_id_token=False, include_refresh_token=False) + + @staticmethod + def _get_query_params(*, url=None, query=None): + """Helper to extract query params from an url or query string""" + if not query: + query = urllib.parse.urlparse(url).query + params = {} + for param, values in urllib.parse.parse_qs(query).items(): + assert len(values) == 1 + params[param] = values[0] + return params + + @staticmethod + def _jwt_encode(header: dict, payload: dict, signature="s1gn6tur3"): + """Poor man's JWT encoding (just for unit testing purposes)""" + + def encode(d): + return base64.urlsafe_b64encode(json.dumps(d).encode("ascii")).decode("ascii").replace("=", "") + + return ".".join([encode(header), encode(payload), signature]) + + def _build_token_response( + self, + sub="123", + name="john", + include_id_token=True, + include_refresh_token: Optional[bool] = None, + ) -> str: + """Build JSON serialized access/id/refresh token response (and store tokens for use in assertions)""" + access_token = self._jwt_encode( + header={}, + payload=dict_no_none( + sub=sub, + name=name, + nonce=self.state.get("nonce"), + _uuid=uuid.uuid4().hex, + ), + ) + res = {"access_token": access_token} + + # Attempt to simulate real world refresh token support. + if include_refresh_token is None: + if "offline_access" in self.scopes_supported: + # "offline_access" scope as suggested in spec + # (https://openid.net/specs/openid-connect-core-1_0.html#OfflineAccess) + # Implemented by Microsoft, EGI Check-in + include_refresh_token = "offline_access" in self.state.get("scope", "").split(" ") + else: + # Google OAuth style: no support for "offline_access", return refresh token automatically? + include_refresh_token = True + if include_refresh_token: + res["refresh_token"] = self._jwt_encode(header={}, payload={"foo": "refresh", "_uuid": uuid.uuid4().hex}) + if include_id_token: + res["id_token"] = access_token + self.state.update(res) + self.state.update(name=name, sub=sub) + return json.dumps(res) + + def validate_access_token(self, access_token: str): + if access_token == self.state["access_token"]: + return {"user_id": self.state["name"], "sub": self.state["sub"]} + raise LookupError("Invalid access token") + + def invalidate_access_token(self): + self.state["access_token"] = "***invalidated***" + + def get_request_history( + self, url: Optional[str] = None, method: Optional[str] = None + ) -> List[requests_mock.request._RequestObjectProxy]: + """Get mocked request history: requests with given method/url.""" + if url and url.startswith("/"): + url = url_join(self.oidc_issuer, url) + return [ + r + for r in self.requests_mock.request_history + if (method is None or method.lower() == r.method.lower()) and (url is None or url == r.url) + ] diff --git a/lib/openeo/rest/connection.py b/lib/openeo/rest/connection.py new file mode 100644 index 000000000..28de15dfd --- /dev/null +++ b/lib/openeo/rest/connection.py @@ -0,0 +1,2084 @@ +""" +This module provides a Connection object to manage and persist settings when interacting with the OpenEO API. +""" +from __future__ import annotations + +import datetime +import json +import logging +import os +import shlex +import sys +import warnings +from collections import OrderedDict +from pathlib import Path, PurePosixPath +from typing import ( + Any, + Callable, + Dict, + Iterable, + Iterator, + List, + Optional, + Sequence, + Set, + Tuple, + Union, +) + +import requests +import shapely.geometry.base +from requests import Response +from requests.auth import AuthBase, HTTPBasicAuth + +import openeo +from openeo.capabilities import ApiVersionException, ComparableVersion +from openeo.config import config_log, get_config_option +from openeo.internal.documentation import openeo_process +from openeo.internal.graph_building import FlatGraphableMixin, PGNode, as_flat_graph +from openeo.internal.jupyter import VisualDict, VisualList +from openeo.internal.processes.builder import ProcessBuilderBase +from openeo.internal.warnings import deprecated, legacy_alias +from openeo.metadata import ( + Band, + BandDimension, + CollectionMetadata, + SpatialDimension, + TemporalDimension, +) +from openeo.rest import ( + DEFAULT_DOWNLOAD_CHUNK_SIZE, + CapabilitiesException, + OpenEoApiError, + OpenEoApiPlainError, + OpenEoClientException, + OpenEoRestError, +) +from openeo.rest._datacube import _ProcessGraphAbstraction, build_child_callback +from openeo.rest.auth.auth import BasicBearerAuth, BearerAuth, NullAuth, OidcBearerAuth +from openeo.rest.auth.config import AuthConfig, RefreshTokenStore +from openeo.rest.auth.oidc import ( + DefaultOidcClientGrant, + GrantsChecker, + OidcAuthCodePkceAuthenticator, + OidcAuthenticator, + OidcClientCredentialsAuthenticator, + OidcClientInfo, + OidcDeviceAuthenticator, + OidcException, + OidcProviderInfo, + OidcRefreshTokenAuthenticator, + OidcResourceOwnerPasswordAuthenticator, +) +from openeo.rest.datacube import DataCube, InputDate +from openeo.rest.graph_building import CollectionProperty +from openeo.rest.job import BatchJob, RESTJob +from openeo.rest.mlmodel import MlModel +from openeo.rest.rest_capabilities import RESTCapabilities +from openeo.rest.service import Service +from openeo.rest.udp import Parameter, RESTUserDefinedProcess +from openeo.rest.userfile import UserFile +from openeo.rest.vectorcube import VectorCube +from openeo.util import ( + ContextTimer, + LazyLoadCache, + dict_no_none, + ensure_list, + load_json_resource, + repr_truncate, + rfc3339, + str_truncate, + url_join, +) + +_log = logging.getLogger(__name__) + +# Default timeouts for requests +# TODO: get default_timeout from config? +DEFAULT_TIMEOUT = 20 * 60 +DEFAULT_TIMEOUT_SYNCHRONOUS_EXECUTE = 30 * 60 + + +class RestApiConnection: + """Base connection class implementing generic REST API request functionality""" + + def __init__( + self, + root_url: str, + auth: Optional[AuthBase] = None, + session: Optional[requests.Session] = None, + default_timeout: Optional[int] = None, + slow_response_threshold: Optional[float] = None, + ): + self._root_url = root_url + self.auth = auth or NullAuth() + self.session = session or requests.Session() + self.default_timeout = default_timeout or DEFAULT_TIMEOUT + self.default_headers = { + "User-Agent": "openeo-python-client/{cv} {py}/{pv} {pl}".format( + cv=openeo.client_version(), + py=sys.implementation.name, pv=".".join(map(str, sys.version_info[:3])), + pl=sys.platform + ) + } + self.slow_response_threshold = slow_response_threshold + + @property + def root_url(self): + return self._root_url + + def build_url(self, path: str): + return url_join(self._root_url, path) + + def _merged_headers(self, headers: dict) -> dict: + """Merge default headers with given headers""" + result = self.default_headers.copy() + if headers: + result.update(headers) + return result + + def _is_external(self, url: str) -> bool: + """Check if given url is external (not under root url)""" + root = self.root_url.rstrip("/") + return not (url == root or url.startswith(root + '/')) + + def request( + self, + method: str, + path: str, + *, + params: Optional[dict] = None, + headers: Optional[dict] = None, + auth: Optional[AuthBase] = None, + check_error: bool = True, + expected_status: Optional[Union[int, Iterable[int]]] = None, + **kwargs, + ): + """Generic request send""" + url = self.build_url(path) + # Don't send default auth headers to external domains. + auth = auth or (self.auth if not self._is_external(url) else None) + slow_response_threshold = kwargs.pop("slow_response_threshold", self.slow_response_threshold) + if _log.isEnabledFor(logging.DEBUG): + _log.debug( + "Request `{m} {u}` with params {p}, headers {h}, auth {a}, kwargs {k}".format( + m=method.upper(), + u=url, + p=params, + h=headers and headers.keys(), + a=type(auth).__name__, + k=list(kwargs.keys()), + ) + ) + with ContextTimer() as timer: + resp = self.session.request( + method=method, + url=url, + params=params, + headers=self._merged_headers(headers), + auth=auth, + timeout=kwargs.pop("timeout", self.default_timeout), + **kwargs + ) + if slow_response_threshold and timer.elapsed() > slow_response_threshold: + _log.warning("Slow response: `{m} {u}` took {e:.2f}s (>{t:.2f}s)".format( + m=method.upper(), u=str_truncate(url, width=64), + e=timer.elapsed(), t=slow_response_threshold + )) + if _log.isEnabledFor(logging.DEBUG): + _log.debug( + f"openEO request `{resp.request.method} {resp.request.path_url}` -> response {resp.status_code} headers {resp.headers!r}" + ) + # Check for API errors and unexpected HTTP status codes as desired. + status = resp.status_code + expected_status = ensure_list(expected_status) if expected_status else [] + if check_error and status >= 400 and status not in expected_status: + self._raise_api_error(resp) + if expected_status and status not in expected_status: + raise OpenEoRestError("Got status code {s!r} for `{m} {p}` (expected {e!r}) with body {body}".format( + m=method.upper(), p=path, s=status, e=expected_status, body=resp.text) + ) + return resp + + def _raise_api_error(self, response: requests.Response): + """Convert API error response to Python exception""" + status_code = response.status_code + try: + info = response.json() + except Exception: + info = None + + # Valid JSON object with "code" and "message" fields indicates a proper openEO API error. + if isinstance(info, dict): + error_code = info.get("code") + error_message = info.get("message") + if error_code and isinstance(error_code, str) and error_message and isinstance(error_message, str): + raise OpenEoApiError( + http_status_code=status_code, + code=error_code, + message=error_message, + id=info.get("id"), + url=info.get("url"), + ) + + # Failed to parse it as a compliant openEO API error: show body as-is in the exception. + text = response.text + error_message = None + _log.warning(f"Failed to parse API error response: [{status_code}] {text!r} (headers: {response.headers})") + + # TODO: eliminate this VITO-backend specific error massaging? + if status_code == 502 and "Proxy Error" in text: + error_message = ( + "Received 502 Proxy Error." + " This typically happens when a synchronous openEO processing request takes too long and is aborted." + " Consider using a batch job instead." + ) + + raise OpenEoApiPlainError(message=text, http_status_code=status_code, error_message=error_message) + + def get( + self, + path: str, + *, + params: Optional[dict] = None, + stream: bool = False, + auth: Optional[AuthBase] = None, + **kwargs, + ) -> Response: + """ + Do GET request to REST API. + + :param path: API path (without root url) + :param params: Additional query parameters + :param stream: True if the get request should be streamed, else False + :param auth: optional custom authentication to use instead of the default one + :return: response: Response + """ + return self.request("get", path=path, params=params, stream=stream, auth=auth, **kwargs) + + def post(self, path: str, json: Optional[dict] = None, **kwargs) -> Response: + """ + Do POST request to REST API. + + :param path: API path (without root url) + :param json: Data (as dictionary) to be posted with JSON encoding) + :return: response: Response + """ + return self.request("post", path=path, json=json, allow_redirects=False, **kwargs) + + def delete(self, path: str, **kwargs) -> Response: + """ + Do DELETE request to REST API. + + :param path: API path (without root url) + :return: response: Response + """ + return self.request("delete", path=path, allow_redirects=False, **kwargs) + + def patch(self, path: str, **kwargs) -> Response: + """ + Do PATCH request to REST API. + + :param path: API path (without root url) + :return: response: Response + """ + return self.request("patch", path=path, allow_redirects=False, **kwargs) + + def put(self, path: str, headers: Optional[dict] = None, data: Optional[dict] = None, **kwargs) -> Response: + """ + Do PUT request to REST API. + + :param path: API path (without root url) + :param headers: headers that gets added to the request. + :param data: data that gets added to the request. + :return: response: Response + """ + return self.request("put", path=path, data=data, headers=headers, allow_redirects=False, **kwargs) + + def __repr__(self): + return "<{c} to {r!r} with {a}>".format(c=type(self).__name__, r=self._root_url, a=type(self.auth).__name__) + + +class Connection(RestApiConnection): + """ + Connection to an openEO backend. + + :param url: Backend root url + :param session: Optional ``requests.Session`` object to use for requests. + :param default_timeout: Default timeout for requests in seconds. + :param auto_validate: toggle to automatically validate process graphs before execution + :param slow_response_threshold: Optional threshold in seconds + to consider a response as slow and log a warning. + :param auth_config: Optional :class:`AuthConfig` object + to fetch authentication related configuration from. + :param refresh_token_store: For advanced usage: + custom :class:`RefreshTokenStore` object + to use for storing/loading refresh tokens. + :param oidc_auth_renewer: For advanced usage: + optional :class:`OidcAuthenticator` object to use for renewing OIDC tokens. + :param auth: Optional ``requests.auth.AuthBase`` object to use for requests. + Usage of this parameter is deprecated, use the specific authentication methods instead. + """ + + _MINIMUM_API_VERSION = ComparableVersion("1.0.0") + + def __init__( + self, + url: str, + *, + session: Optional[requests.Session] = None, + default_timeout: Optional[int] = None, + auto_validate: bool = True, + slow_response_threshold: Optional[float] = None, + auth_config: Optional[AuthConfig] = None, + refresh_token_store: Optional[RefreshTokenStore] = None, + oidc_auth_renewer: Optional[OidcAuthenticator] = None, + auth: Optional[AuthBase] = None, + ): + if "://" not in url: + url = "https://" + url + self._orig_url = url + super().__init__( + root_url=self.version_discovery(url, session=session, timeout=default_timeout), + auth=auth, session=session, default_timeout=default_timeout, + slow_response_threshold=slow_response_threshold, + ) + self._capabilities_cache = LazyLoadCache() + + # Initial API version check. + self._api_version.require_at_least(self._MINIMUM_API_VERSION) + + self._auth_config = auth_config + self._refresh_token_store = refresh_token_store + self._oidc_auth_renewer = oidc_auth_renewer + self._auto_validate = auto_validate + + @classmethod + def version_discovery( + cls, url: str, session: Optional[requests.Session] = None, timeout: Optional[int] = None + ) -> str: + """ + Do automatic openEO API version discovery from given url, using a "well-known URI" strategy. + + :param url: initial backend url (not including "/.well-known/openeo") + :return: root url of highest supported backend version + """ + try: + connection = RestApiConnection(url, session=session) + well_known_url_response = connection.get("/.well-known/openeo", timeout=timeout) + assert well_known_url_response.status_code == 200 + versions = well_known_url_response.json()["versions"] + supported_versions = [v for v in versions if cls._MINIMUM_API_VERSION <= v["api_version"]] + assert supported_versions + production_versions = [v for v in supported_versions if v.get("production", True)] + highest_version = max(production_versions or supported_versions, key=lambda v: v["api_version"]) + _log.debug("Highest supported version available in backend: %s" % highest_version) + return highest_version['url'] + except Exception: + # Be very lenient about failing on the well-known URI strategy. + return url + + def _get_auth_config(self) -> AuthConfig: + if self._auth_config is None: + self._auth_config = AuthConfig() + return self._auth_config + + def _get_refresh_token_store(self) -> RefreshTokenStore: + if self._refresh_token_store is None: + self._refresh_token_store = RefreshTokenStore() + return self._refresh_token_store + + def authenticate_basic(self, username: Optional[str] = None, password: Optional[str] = None) -> Connection: + """ + Authenticate a user to the backend using basic username and password. + + :param username: User name + :param password: User passphrase + """ + if not self.capabilities().supports_endpoint("/credentials/basic", method="GET"): + raise OpenEoClientException("This openEO back-end does not support basic authentication.") + if username is None: + username, password = self._get_auth_config().get_basic_auth(backend=self._orig_url) + if username is None: + raise OpenEoClientException("No username/password given or found.") + + resp = self.get( + '/credentials/basic', + # /credentials/basic is the only endpoint that expects a Basic HTTP auth + auth=HTTPBasicAuth(username, password) + ).json() + # Switch to bearer based authentication in further requests. + self.auth = BasicBearerAuth(access_token=resp["access_token"]) + return self + + def _get_oidc_provider( + self, provider_id: Union[str, None] = None, parse_info: bool = True + ) -> Tuple[str, Union[OidcProviderInfo, None]]: + """ + Get provider id and info, based on context. + If provider_id is given, verify it against backend's list of providers. + If not given, find a suitable provider based on env vars, config or backend's default. + + :param provider_id: id of OIDC provider as specified by backend (/credentials/oidc). + Can be None if there is just one provider. + :param parse_info: whether to parse the provider info into an :py:class:`OidcProviderInfo` object + (which involves a ".well-known/openid-configuration" request) + :return: resolved/verified provider_id and provider info object (unless ``parse_info`` is False) + """ + oidc_info = self.get("/credentials/oidc", expected_status=200).json() + providers = OrderedDict((p["id"], p) for p in oidc_info["providers"]) + if len(providers) < 1: + raise OpenEoClientException("Backend lists no OIDC providers.") + _log.info("Found OIDC providers: {p}".format(p=list(providers.keys()))) + + # TODO: also support specifying provider through issuer URL? + provider_id_from_env = os.environ.get("OPENEO_AUTH_PROVIDER_ID") + + if provider_id: + if provider_id not in providers: + raise OpenEoClientException( + "Requested OIDC provider {r!r} not available. Should be one of {p}.".format( + r=provider_id, p=list(providers.keys()) + ) + ) + provider = providers[provider_id] + elif provider_id_from_env and provider_id_from_env in providers: + _log.info(f"Using provider_id {provider_id_from_env!r} from OPENEO_AUTH_PROVIDER_ID env var") + provider_id = provider_id_from_env + provider = providers[provider_id] + elif len(providers) == 1: + provider_id, provider = providers.popitem() + _log.info( + f"No OIDC provider given, but only one available: {provider_id!r}. Using that one." + ) + else: + # Check if there is a single provider in the config to use. + backend = self._orig_url + provider_configs = self._get_auth_config().get_oidc_provider_configs( + backend=backend + ) + intersection = set(provider_configs.keys()).intersection(providers.keys()) + if len(intersection) == 1: + provider_id = intersection.pop() + provider = providers[provider_id] + _log.info( + f"No OIDC provider given, but only one in config (for backend {backend!r}): {provider_id!r}. Using that one." + ) + else: + provider_id, provider = providers.popitem(last=False) + _log.info( + f"No OIDC provider given. Using first provider {provider_id!r} as advertised by backend." + ) + + provider_info = OidcProviderInfo.from_dict(provider) if parse_info else None + + return provider_id, provider_info + + def _get_oidc_provider_and_client_info( + self, + provider_id: str, + client_id: Union[str, None] = None, + client_secret: Union[str, None] = None, + default_client_grant_check: Union[None, GrantsChecker] = None, + ) -> Tuple[str, OidcClientInfo]: + """ + Resolve provider_id and client info (as given or from config) + + :param provider_id: id of OIDC provider as specified by backend (/credentials/oidc). + Can be None if there is just one provider. + + :return: OIDC provider id and client info + """ + provider_id, provider = self._get_oidc_provider(provider_id) + + if client_id is None: + _log.debug("No client_id: checking config for preferred client_id") + client_id, client_secret = self._get_auth_config().get_oidc_client_configs( + backend=self._orig_url, provider_id=provider_id + ) + if client_id: + _log.info("Using client_id {c!r} from config (provider {p!r})".format(c=client_id, p=provider_id)) + if client_id is None and default_client_grant_check: + # Try "default_clients" from backend's provider info. + _log.debug("No client_id given: checking default clients in backend's provider info") + client_id = provider.get_default_client_id(grant_check=default_client_grant_check) + if client_id: + _log.info("Using default client_id {c!r} from OIDC provider {p!r} info.".format( + c=client_id, p=provider_id + )) + if client_id is None: + raise OpenEoClientException("No client_id found.") + + client_info = OidcClientInfo(client_id=client_id, client_secret=client_secret, provider=provider) + + return provider_id, client_info + + def _authenticate_oidc( + self, + authenticator: OidcAuthenticator, + *, + provider_id: str, + store_refresh_token: bool = False, + fallback_refresh_token_to_store: Optional[str] = None, + oidc_auth_renewer: Optional[OidcAuthenticator] = None, + ) -> Connection: + """ + Authenticate through OIDC and set up bearer token (based on OIDC access_token) for further requests. + """ + tokens = authenticator.get_tokens(request_refresh_token=store_refresh_token) + _log.info("Obtained tokens: {t}".format(t=[k for k, v in tokens._asdict().items() if v])) + if store_refresh_token: + refresh_token = tokens.refresh_token or fallback_refresh_token_to_store + if refresh_token: + self._get_refresh_token_store().set_refresh_token( + issuer=authenticator.provider_info.issuer, + client_id=authenticator.client_id, + refresh_token=refresh_token + ) + if not oidc_auth_renewer: + oidc_auth_renewer = OidcRefreshTokenAuthenticator( + client_info=authenticator.client_info, refresh_token=refresh_token + ) + else: + _log.warning("No OIDC refresh token to store.") + token = tokens.access_token + self.auth = OidcBearerAuth(provider_id=provider_id, access_token=token) + self._oidc_auth_renewer = oidc_auth_renewer + return self + + def authenticate_oidc_authorization_code( + self, + client_id: Optional[str] = None, + client_secret: Optional[str] = None, + provider_id: Optional[str] = None, + timeout: Optional[int] = None, + server_address: Optional[Tuple[str, int]] = None, + webbrowser_open: Optional[Callable] = None, + store_refresh_token=False, + ) -> Connection: + """ + OpenID Connect Authorization Code Flow (with PKCE). + + .. deprecated:: 0.19.0 + Usage of the Authorization Code flow is deprecated (because of its complexity) and will be removed. + It is recommended to use the Device Code flow with :py:meth:`authenticate_oidc_device` + or Client Credentials flow with :py:meth:`authenticate_oidc_client_credentials`. + """ + provider_id, client_info = self._get_oidc_provider_and_client_info( + provider_id=provider_id, client_id=client_id, client_secret=client_secret, + default_client_grant_check=[DefaultOidcClientGrant.AUTH_CODE_PKCE], + ) + authenticator = OidcAuthCodePkceAuthenticator( + client_info=client_info, + webbrowser_open=webbrowser_open, timeout=timeout, server_address=server_address + ) + return self._authenticate_oidc(authenticator, provider_id=provider_id, store_refresh_token=store_refresh_token) + + def authenticate_oidc_client_credentials( + self, + client_id: Optional[str] = None, + client_secret: Optional[str] = None, + provider_id: Optional[str] = None, + ) -> Connection: + """ + Authenticate with :ref:`OIDC Client Credentials flow ` + + Client id, secret and provider id can be specified directly through the available arguments. + It is also possible to leave these arguments empty and specify them through + environment variables ``OPENEO_AUTH_CLIENT_ID``, + ``OPENEO_AUTH_CLIENT_SECRET`` and ``OPENEO_AUTH_PROVIDER_ID`` respectively + as discussed in :ref:`authenticate_oidc_client_credentials_env_vars`. + + :param client_id: client id to use + :param client_secret: client secret to use + :param provider_id: provider id to use + Fallback value can be set through environment variable ``OPENEO_AUTH_PROVIDER_ID``. + + .. versionchanged:: 0.18.0 Allow specifying client id, secret and provider id through environment variables. + """ + # TODO: option to get client id/secret from a config file too? + if client_id is None and "OPENEO_AUTH_CLIENT_ID" in os.environ and "OPENEO_AUTH_CLIENT_SECRET" in os.environ: + client_id = os.environ.get("OPENEO_AUTH_CLIENT_ID") + client_secret = os.environ.get("OPENEO_AUTH_CLIENT_SECRET") + _log.debug(f"Getting client id ({client_id}) and secret from environment") + + provider_id, client_info = self._get_oidc_provider_and_client_info( + provider_id=provider_id, client_id=client_id, client_secret=client_secret + ) + authenticator = OidcClientCredentialsAuthenticator(client_info=client_info) + return self._authenticate_oidc( + authenticator, provider_id=provider_id, store_refresh_token=False, oidc_auth_renewer=authenticator + ) + + def authenticate_oidc_resource_owner_password_credentials( + self, + username: str, + password: str, + client_id: Optional[str] = None, + client_secret: Optional[str] = None, + provider_id: Optional[str] = None, + store_refresh_token: bool = False, + ) -> Connection: + """ + OpenId Connect Resource Owner Password Credentials + """ + provider_id, client_info = self._get_oidc_provider_and_client_info( + provider_id=provider_id, client_id=client_id, client_secret=client_secret + ) + # TODO: also get username and password from config? + authenticator = OidcResourceOwnerPasswordAuthenticator( + client_info=client_info, username=username, password=password + ) + return self._authenticate_oidc(authenticator, provider_id=provider_id, store_refresh_token=store_refresh_token) + + def authenticate_oidc_refresh_token( + self, + client_id: Optional[str] = None, + refresh_token: Optional[str] = None, + client_secret: Optional[str] = None, + provider_id: Optional[str] = None, + *, + store_refresh_token: bool = False, + ) -> Connection: + """ + Authenticate with :ref:`OIDC Refresh Token flow ` + + :param client_id: client id to use + :param refresh_token: refresh token to use + :param client_secret: client secret to use + :param provider_id: provider id to use. + Fallback value can be set through environment variable ``OPENEO_AUTH_PROVIDER_ID``. + :param store_refresh_token: whether to store the received refresh token automatically + + .. versionchanged:: 0.19.0 Support fallback provider id through environment variable ``OPENEO_AUTH_PROVIDER_ID``. + """ + provider_id, client_info = self._get_oidc_provider_and_client_info( + provider_id=provider_id, client_id=client_id, client_secret=client_secret, + default_client_grant_check=[DefaultOidcClientGrant.REFRESH_TOKEN], + ) + + if refresh_token is None: + refresh_token = self._get_refresh_token_store().get_refresh_token( + issuer=client_info.provider.issuer, + client_id=client_info.client_id + ) + if refresh_token is None: + raise OpenEoClientException("No refresh token given or found") + + authenticator = OidcRefreshTokenAuthenticator(client_info=client_info, refresh_token=refresh_token) + return self._authenticate_oidc( + authenticator, + provider_id=provider_id, + store_refresh_token=store_refresh_token, + fallback_refresh_token_to_store=refresh_token, + oidc_auth_renewer=authenticator, + ) + + def authenticate_oidc_device( + self, + client_id: Optional[str] = None, + client_secret: Optional[str] = None, + provider_id: Optional[str] = None, + *, + store_refresh_token: bool = False, + use_pkce: Optional[bool] = None, + max_poll_time: float = OidcDeviceAuthenticator.DEFAULT_MAX_POLL_TIME, + **kwargs, + ) -> Connection: + """ + Authenticate with the :ref:`OIDC Device Code flow ` + + :param client_id: client id to use instead of the default one + :param client_secret: client secret to use instead of the default one + :param provider_id: provider id to use. + Fallback value can be set through environment variable ``OPENEO_AUTH_PROVIDER_ID``. + :param store_refresh_token: whether to store the received refresh token automatically + :param use_pkce: Use PKCE instead of client secret. + If not set explicitly to `True` (use PKCE) or `False` (use client secret), + it will be attempted to detect the best mode automatically. + Note that PKCE for device code is not widely supported among OIDC providers. + :param max_poll_time: maximum time in seconds to keep polling for successful authentication. + + .. versionchanged:: 0.5.1 Add :py:obj:`use_pkce` argument + .. versionchanged:: 0.17.0 Add :py:obj:`max_poll_time` argument + .. versionchanged:: 0.19.0 Support fallback provider id through environment variable ``OPENEO_AUTH_PROVIDER_ID``. + """ + _g = DefaultOidcClientGrant # alias for compactness + provider_id, client_info = self._get_oidc_provider_and_client_info( + provider_id=provider_id, client_id=client_id, client_secret=client_secret, + default_client_grant_check=(lambda grants: _g.DEVICE_CODE in grants or _g.DEVICE_CODE_PKCE in grants), + ) + authenticator = OidcDeviceAuthenticator( + client_info=client_info, use_pkce=use_pkce, max_poll_time=max_poll_time, **kwargs + ) + return self._authenticate_oidc(authenticator, provider_id=provider_id, store_refresh_token=store_refresh_token) + + def authenticate_oidc( + self, + provider_id: Optional[str] = None, + client_id: Optional[str] = None, + client_secret: Optional[str] = None, + *, + store_refresh_token: bool = True, + use_pkce: Optional[bool] = None, + display: Callable[[str], None] = print, + max_poll_time: float = OidcDeviceAuthenticator.DEFAULT_MAX_POLL_TIME, + ): + """ + Generic method to do OpenID Connect authentication. + + In the context of interactive usage, this method first tries to use refresh tokens + and falls back on device code flow. + + For non-interactive, machine-to-machine contexts, it is also possible to trigger + the usage of the "client_credentials" flow through environment variables. + Assuming you have set up a OIDC client (with a secret): + set ``OPENEO_AUTH_METHOD`` to ``client_credentials``, + set ``OPENEO_AUTH_CLIENT_ID`` to the client id, + and set ``OPENEO_AUTH_CLIENT_SECRET`` to the client secret. + + See :ref:`authenticate_oidc_automatic` for more details. + + :param provider_id: provider id to use + :param client_id: client id to use + :param client_secret: client secret to use + :param max_poll_time: maximum time in seconds to keep polling for successful authentication. + + .. versionadded:: 0.6.0 + .. versionchanged:: 0.17.0 Add :py:obj:`max_poll_time` argument + .. versionchanged:: 0.18.0 Add support for client credentials flow. + """ + # TODO: unify `os.environ.get` with `get_config_option`? + # TODO also support OPENEO_AUTH_CLIENT_ID, ... env vars for refresh token and device code auth? + + auth_method = os.environ.get("OPENEO_AUTH_METHOD") + if auth_method == "client_credentials": + _log.debug("authenticate_oidc: going for 'client_credentials' authentication") + return self.authenticate_oidc_client_credentials( + client_id=client_id, client_secret=client_secret, provider_id=provider_id + ) + elif auth_method: + raise ValueError(f"Unhandled auth method {auth_method}") + + _g = DefaultOidcClientGrant # alias for compactness + provider_id, client_info = self._get_oidc_provider_and_client_info( + provider_id=provider_id, client_id=client_id, client_secret=client_secret, + default_client_grant_check=lambda grants: ( + _g.REFRESH_TOKEN in grants and (_g.DEVICE_CODE in grants or _g.DEVICE_CODE_PKCE in grants) + ) + ) + + # Try refresh token first. + refresh_token = self._get_refresh_token_store().get_refresh_token( + issuer=client_info.provider.issuer, + client_id=client_info.client_id + ) + if refresh_token: + try: + _log.info("Found refresh token: trying refresh token based authentication.") + authenticator = OidcRefreshTokenAuthenticator(client_info=client_info, refresh_token=refresh_token) + con = self._authenticate_oidc( + authenticator, + provider_id=provider_id, + store_refresh_token=store_refresh_token, + fallback_refresh_token_to_store=refresh_token, + ) + # TODO: pluggable/jupyter-aware display function? + print("Authenticated using refresh token.") + return con + except OidcException as e: + _log.info("Refresh token based authentication failed: {e}.".format(e=e)) + + # Fall back on device code flow + # TODO: make it possible to do other fallback flows too? + _log.info("Trying device code flow.") + authenticator = OidcDeviceAuthenticator( + client_info=client_info, use_pkce=use_pkce, display=display, max_poll_time=max_poll_time + ) + con = self._authenticate_oidc( + authenticator, + provider_id=provider_id, + store_refresh_token=store_refresh_token, + ) + print("Authenticated using device code flow.") + return con + + def authenticate_oidc_access_token(self, access_token: str, provider_id: Optional[str] = None) -> Connection: + """ + Set up authorization headers directly with an OIDC access token. + + :py:class:`Connection` provides multiple methods to handle various OIDC authentication flows end-to-end. + If you already obtained a valid OIDC access token in another "out-of-band" way, you can use this method to + set up the authorization headers appropriately. + + :param access_token: OIDC access token + :param provider_id: id of the OIDC provider as listed by the openEO backend (``/credentials/oidc``). + If not specified, the first (default) OIDC provider will be used. + :param skip_verification: Skip clients-side verification of the provider_id + against the backend's list of providers to avoid and related OIDC configuration + + .. versionadded:: 0.31.0 + + .. versionchanged:: 0.33.0 + Return connection object to support chaining. + """ + provider_id, _ = self._get_oidc_provider(provider_id=provider_id, parse_info=False) + self.auth = OidcBearerAuth(provider_id=provider_id, access_token=access_token) + self._oidc_auth_renewer = None + return self + + def request( + self, + method: str, + path: str, + headers: Optional[dict] = None, + auth: Optional[AuthBase] = None, + check_error: bool = True, + expected_status: Optional[Union[int, Iterable[int]]] = None, + **kwargs, + ): + # Do request, but with retry when access token has expired and refresh token is available. + def _request(): + return super(Connection, self).request( + method=method, path=path, headers=headers, auth=auth, + check_error=check_error, expected_status=expected_status, **kwargs, + ) + + try: + # Initial request attempt + return _request() + except OpenEoApiError as api_exc: + if api_exc.http_status_code in {401, 403} and api_exc.code == "TokenInvalid": + # Auth token expired: can we refresh? + if isinstance(self.auth, OidcBearerAuth) and self._oidc_auth_renewer: + msg = f"OIDC access token expired ({api_exc.http_status_code} {api_exc.code})." + try: + self._authenticate_oidc( + authenticator=self._oidc_auth_renewer, + provider_id=self._oidc_auth_renewer.provider_info.id, + store_refresh_token=False, + oidc_auth_renewer=self._oidc_auth_renewer, + ) + _log.info(f"{msg} Obtained new access token (grant {self._oidc_auth_renewer.grant_type!r}).") + except OpenEoClientException as auth_exc: + _log.error( + f"{msg} Failed to obtain new access token (grant {self._oidc_auth_renewer.grant_type!r}): {auth_exc!r}." + ) + else: + # Retry request. + return _request() + raise + + def describe_account(self) -> dict: + """ + Describes the currently authenticated user account. + """ + return self.get('/me', expected_status=200).json() + + @deprecated("use :py:meth:`list_jobs` instead", version="0.4.10") + def user_jobs(self) -> List[dict]: + return self.list_jobs() + + def list_collections(self) -> List[dict]: + """ + List basic metadata of all collections provided by the back-end. + + .. caution:: + + Only the basic collection metadata will be returned. + To obtain full metadata of a particular collection, + it is recommended to use :py:meth:`~openeo.rest.connection.Connection.describe_collection` instead. + + :return: list of dictionaries with basic collection metadata. + """ + # TODO: add caching #383 + data = self.get('/collections', expected_status=200).json()["collections"] + return VisualList("collections", data=data) + + def list_collection_ids(self) -> List[str]: + """ + List all collection ids provided by the back-end. + + .. seealso:: + + :py:meth:`~openeo.rest.connection.Connection.describe_collection` + to get the metadata of a particular collection. + + :return: list of collection ids + """ + return [collection['id'] for collection in self.list_collections() if 'id' in collection] + + def capabilities(self) -> RESTCapabilities: + """ + Loads all available capabilities. + """ + return self._capabilities_cache.get( + "capabilities", + load=lambda: RESTCapabilities(data=self.get('/', expected_status=200).json(), url=self._orig_url) + ) + + def list_input_formats(self) -> dict: + return self.list_file_formats().get("input", {}) + + def list_output_formats(self) -> dict: + return self.list_file_formats().get("output", {}) + + list_file_types = legacy_alias( + list_output_formats, "list_file_types", since="0.4.6" + ) + + def list_file_formats(self) -> dict: + """ + Get available input and output formats + """ + formats = self._capabilities_cache.get( + key="file_formats", + load=lambda: self.get('/file_formats', expected_status=200).json() + ) + return VisualDict("file-formats", data=formats) + + def list_service_types(self) -> dict: + """ + Loads all available service types. + + :return: data_dict: Dict All available service types + """ + types = self._capabilities_cache.get( + key="service_types", + load=lambda: self.get('/service_types', expected_status=200).json() + ) + return VisualDict("service-types", data=types) + + def list_udf_runtimes(self) -> dict: + """ + List information about the available UDF runtimes. + + :return: A dictionary with metadata about each available UDF runtime. + """ + runtimes = self._capabilities_cache.get( + key="udf_runtimes", + load=lambda: self.get('/udf_runtimes', expected_status=200).json() + ) + return VisualDict("udf-runtimes", data=runtimes) + + def list_services(self) -> dict: + """ + Loads all available services of the authenticated user. + + :return: data_dict: Dict All available services + """ + # TODO return parsed service objects + services = self.get('/services', expected_status=200).json()["services"] + return VisualList("data-table", data=services, parameters={'columns': 'services'}) + + def describe_collection(self, collection_id: str) -> dict: + """ + Get full collection metadata for given collection id. + + .. seealso:: + + :py:meth:`~openeo.rest.connection.Connection.list_collection_ids` + to list all collection ids provided by the back-end. + + :param collection_id: collection id + :return: collection metadata. + """ + # TODO: duplication with `Connection.collection_metadata`: deprecate one or the other? + # TODO: add caching #383 + data = self.get(f"/collections/{collection_id}", expected_status=200).json() + return VisualDict("collection", data=data) + + def collection_items( + self, + name, + spatial_extent: Optional[List[float]] = None, + temporal_extent: Optional[List[Union[str, datetime.datetime]]] = None, + limit: Optional[int] = None, + ) -> Iterator[dict]: + """ + Loads items for a specific image collection. + May not be available for all collections. + + This is an experimental API and is subject to change. + + :param name: String Id of the collection + :param spatial_extent: Limits the items to the given bounding box in WGS84: + 1. Lower left corner, coordinate axis 1 + 2. Lower left corner, coordinate axis 2 + 3. Upper right corner, coordinate axis 1 + 4. Upper right corner, coordinate axis 2 + + :param temporal_extent: Limits the items to the specified temporal interval. + :param limit: The amount of items per request/page. If None, the back-end decides. + The interval has to be specified as an array with exactly two elements (start, end). + Also supports open intervals by setting one of the boundaries to None, but never both. + + :return: data_list: List A list of items + """ + url = '/collections/{}/items'.format(name) + params = {} + if spatial_extent: + params["bbox"] = ",".join(str(c) for c in spatial_extent) + if temporal_extent: + params["datetime"] = "/".join(".." if t is None else rfc3339.normalize(t) for t in temporal_extent) + if limit is not None and limit > 0: + params['limit'] = limit + + return paginate(self, url, params, lambda response, page: VisualDict("items", data = response, parameters = {'show-map': True, 'heading': 'Page {} - Items'.format(page)})) + + def collection_metadata(self, name) -> CollectionMetadata: + # TODO: duplication with `Connection.describe_collection`: deprecate one or the other? + return CollectionMetadata(metadata=self.describe_collection(name)) + + def list_processes(self, namespace: Optional[str] = None) -> List[dict]: + # TODO: Maybe format the result dictionary so that the process_id is the key of the dictionary. + """ + Loads all available processes of the back end. + + :param namespace: The namespace for which to list processes. + + :return: processes_dict: Dict All available processes of the back end. + """ + if namespace is None: + processes = self._capabilities_cache.get( + key=("processes", "backend"), + load=lambda: self.get('/processes', expected_status=200).json()["processes"] + ) + else: + processes = self.get('/processes/' + namespace, expected_status=200).json()["processes"] + return VisualList("processes", data=processes, parameters={'show-graph': True, 'provide-download': False}) + + def describe_process(self, id: str, namespace: Optional[str] = None) -> dict: + """ + Returns a single process from the back end. + + :param id: The id of the process. + :param namespace: The namespace of the process. + + :return: The process definition. + """ + + processes = self.list_processes(namespace) + for process in processes: + if process["id"] == id: + return VisualDict("process", data=process, parameters={'show-graph': True, 'provide-download': False}) + + raise OpenEoClientException("Process does not exist.") + + def list_jobs(self, limit: Union[int, None] = None) -> List[dict]: + """ + Lists all jobs of the authenticated user. + + :param limit: maximum number of jobs to return. Setting this limit enables pagination. + + :return: job_list: Dict of all jobs of the user. + + .. versionadded:: 0.36.0 + Added ``limit`` argument + """ + # TODO: Parse the result so that Job classes returned? + resp = self.get("/jobs", params={"limit": limit}, expected_status=200).json() + if resp.get("federation:missing"): + _log.warning("Partial user job listing due to missing federation components: {c}".format( + c=",".join(resp["federation:missing"]) + )) + # TODO: when pagination is enabled: how to expose link to next page? + jobs = resp["jobs"] + return VisualList("data-table", data=jobs, parameters={'columns': 'jobs'}) + + def assert_user_defined_process_support(self): + """ + Capabilities document based verification that back-end supports user-defined processes. + + .. versionadded:: 0.23.0 + """ + if not self.capabilities().supports_endpoint("/process_graphs"): + raise CapabilitiesException("Backend does not support user-defined processes.") + + def save_user_defined_process( + self, user_defined_process_id: str, + process_graph: Union[dict, ProcessBuilderBase], + parameters: List[Union[dict, Parameter]] = None, + public: bool = False, + summary: Optional[str] = None, + description: Optional[str] = None, + returns: Optional[dict] = None, + categories: Optional[List[str]] = None, + examples: Optional[List[dict]] = None, + links: Optional[List[dict]] = None, + ) -> RESTUserDefinedProcess: + """ + Store a process graph and its metadata on the backend as a user-defined process for the authenticated user. + + :param user_defined_process_id: unique identifier for the user-defined process + :param process_graph: a process graph + :param parameters: a list of parameters + :param public: visible to other users? + :param summary: A short summary of what the process does. + :param description: Detailed description to explain the entity. CommonMark 0.29 syntax MAY be used for rich text representation. + :param returns: Description and schema of the return value. + :param categories: A list of categories. + :param examples: A list of examples. + :param links: A list of links. + :return: a RESTUserDefinedProcess instance + """ + self.assert_user_defined_process_support() + if user_defined_process_id in set(p["id"] for p in self.list_processes()): + warnings.warn("Defining user-defined process {u!r} with same id as a pre-defined process".format( + u=user_defined_process_id)) + if not parameters: + warnings.warn("Defining user-defined process {u!r} without parameters".format(u=user_defined_process_id)) + udp = RESTUserDefinedProcess(user_defined_process_id=user_defined_process_id, connection=self) + udp.store( + process_graph=process_graph, parameters=parameters, public=public, + summary=summary, description=description, + returns=returns, categories=categories, examples=examples, links=links + ) + return udp + + def list_user_defined_processes(self) -> List[dict]: + """ + Lists all user-defined processes of the authenticated user. + """ + self.assert_user_defined_process_support() + data = self.get("/process_graphs", expected_status=200).json()["processes"] + return VisualList("processes", data=data, parameters={'show-graph': True, 'provide-download': False}) + + def user_defined_process(self, user_defined_process_id: str) -> RESTUserDefinedProcess: + """ + Get the user-defined process based on its id. The process with the given id should already exist. + + :param user_defined_process_id: the id of the user-defined process + :return: a RESTUserDefinedProcess instance + """ + return RESTUserDefinedProcess(user_defined_process_id=user_defined_process_id, connection=self) + + def validate_process_graph( + self, process_graph: Union[dict, FlatGraphableMixin, str, Path, List[FlatGraphableMixin]] + ) -> List[dict]: + """ + Validate a process graph without executing it. + + :param process_graph: openEO-style (flat) process graph representation, + or an object that can be converted to such a representation: + a dictionary, a :py:class:`~openeo.rest.datacube.DataCube` object, + a string with a JSON representation, + a local file path or URL to a JSON representation, + a :py:class:`~openeo.rest.multiresult.MultiResult` object, ... + + :return: list of errors (dictionaries with "code" and "message" fields) + """ + pg_with_metadata = self._build_request_with_process_graph(process_graph)["process"] + return self.post(path="/validation", json=pg_with_metadata, expected_status=200).json()["errors"] + + @property + def _api_version(self) -> ComparableVersion: + # TODO make this a public property (it's also useful outside the Connection class) + return self.capabilities().api_version_check + + def vectorcube_from_paths( + self, paths: List[str], format: str, options: dict = {} + ) -> VectorCube: + """ + Loads one or more files referenced by url or path that is accessible by the backend. + + :param paths: The files to read. + :param format: The file format to read from. It must be one of the values that the server reports as supported input file formats. + :param options: The file format parameters to be used to read the files. Must correspond to the parameters that the server reports as supported parameters for the chosen format. + + :return: A :py:class:`VectorCube`. + + .. versionadded:: 0.14.0 + """ + # TODO #457 deprecate this in favor of `load_url` and standard support for `load_uploaded_files` + graph = PGNode( + "load_uploaded_files", + arguments=dict(paths=paths, format=format, options=options), + ) + # TODO: load_uploaded_files might also return a raster data cube. Determine this based on format? + return VectorCube(graph=graph, connection=self) + + def datacube_from_process(self, process_id: str, namespace: Optional[str] = None, **kwargs) -> DataCube: + """ + Load a data cube from a (custom) process. + + :param process_id: The process id. + :param namespace: optional: process namespace + :param kwargs: The arguments of the custom process + :return: A :py:class:`DataCube`, without valid metadata, as the client is not aware of this custom process. + """ + graph = PGNode(process_id, namespace=namespace, arguments=kwargs) + return DataCube(graph=graph, connection=self) + + def datacube_from_flat_graph(self, flat_graph: dict, parameters: Optional[dict] = None) -> DataCube: + """ + Construct a :py:class:`DataCube` from a flat dictionary representation of a process graph. + + .. seealso:: :ref:`datacube_from_json`, :py:meth:`~openeo.rest.connection.Connection.datacube_from_json` + + :param flat_graph: flat dictionary representation of a process graph + or a process dictionary with such a flat process graph under a "process_graph" field + (and optionally parameter metadata under a "parameters" field). + :param parameters: Optional dictionary mapping parameter names to parameter values + to use for parameters occurring in the process graph (e.g. as used in user-defined processes) + :return: A :py:class:`DataCube` corresponding with the operations encoded in the process graph + """ + parameters = parameters or {} + + if "process_graph" in flat_graph: + # `flat_graph` is a "process" structure + # Extract defaults from declared parameters. + for param in flat_graph.get("parameters") or []: + if "default" in param: + parameters.setdefault(param["name"], param["default"]) + + flat_graph = flat_graph["process_graph"] + + pgnode = PGNode.from_flat_graph(flat_graph=flat_graph, parameters=parameters or {}) + return DataCube(graph=pgnode, connection=self) + + def datacube_from_json(self, src: Union[str, Path], parameters: Optional[dict] = None) -> DataCube: + """ + Construct a :py:class:`DataCube` from JSON resource containing (flat) process graph representation. + + .. seealso:: :ref:`datacube_from_json`, :py:meth:`~openeo.rest.connection.Connection.datacube_from_flat_graph` + + :param src: raw JSON string, URL to JSON resource or path to local JSON file + :param parameters: Optional dictionary mapping parameter names to parameter values + to use for parameters occurring in the process graph (e.g. as used in user-defined processes) + :return: A :py:class:`DataCube` corresponding with the operations encoded in the process graph + """ + return self.datacube_from_flat_graph(load_json_resource(src), parameters=parameters) + + @openeo_process + def load_collection( + self, + collection_id: Union[str, Parameter], + spatial_extent: Union[Dict[str, float], Parameter, None] = None, + temporal_extent: Union[Sequence[InputDate], Parameter, str, None] = None, + bands: Union[None, List[str], Parameter] = None, + properties: Union[ + None, Dict[str, Union[str, PGNode, Callable]], List[CollectionProperty], CollectionProperty + ] = None, + max_cloud_cover: Optional[float] = None, + fetch_metadata: bool = True, + ) -> DataCube: + """ + Load a DataCube by collection id. + + :param collection_id: image collection identifier + :param spatial_extent: limit data to specified bounding box or polygons + :param temporal_extent: limit data to specified temporal interval. + Typically, just a two-item list or tuple containing start and end date. + See :ref:`filtering-on-temporal-extent-section` for more details on temporal extent handling and shorthand notation. + :param bands: only add the specified bands. + :param properties: limit data by collection metadata property predicates. + See :py:func:`~openeo.rest.graph_building.collection_property` for easy construction of such predicates. + :param max_cloud_cover: shortcut to set maximum cloud cover ("eo:cloud_cover" collection property) + :return: a datacube containing the requested data + + .. versionadded:: 0.13.0 + added the ``max_cloud_cover`` argument. + + .. versionchanged:: 0.23.0 + Argument ``temporal_extent``: add support for year/month shorthand notation + as discussed at :ref:`date-shorthand-handling`. + + .. versionchanged:: 0.26.0 + Add :py:func:`~openeo.rest.graph_building.collection_property` support to ``properties`` argument. + """ + return DataCube.load_collection( + collection_id=collection_id, + connection=self, + spatial_extent=spatial_extent, + temporal_extent=temporal_extent, + bands=bands, + properties=properties, + max_cloud_cover=max_cloud_cover, + fetch_metadata=fetch_metadata, + ) + + # TODO: remove this #100 #134 0.4.10 + imagecollection = legacy_alias( + load_collection, name="imagecollection", since="0.4.10" + ) + + @openeo_process + def load_result( + self, + id: str, + spatial_extent: Optional[Dict[str, float]] = None, + temporal_extent: Union[Sequence[InputDate], Parameter, str, None] = None, + bands: Optional[List[str]] = None, + ) -> DataCube: + """ + Loads batch job results by job id from the server-side user workspace. + The job must have been stored by the authenticated user on the back-end currently connected to. + + :param id: The id of a batch job with results. + :param spatial_extent: limit data to specified bounding box or polygons + :param temporal_extent: limit data to specified temporal interval. + Typically, just a two-item list or tuple containing start and end date. + See :ref:`filtering-on-temporal-extent-section` for more details on temporal extent handling and shorthand notation. + :param bands: only add the specified bands + + :return: a :py:class:`DataCube` + + .. versionchanged:: 0.23.0 + Argument ``temporal_extent``: add support for year/month shorthand notation + as discussed at :ref:`date-shorthand-handling`. + """ + # TODO: add check that back-end supports `load_result` process? + cube = self.datacube_from_process( + process_id="load_result", + id=id, + **dict_no_none( + spatial_extent=spatial_extent, + temporal_extent=temporal_extent and DataCube._get_temporal_extent(extent=temporal_extent), + bands=bands, + ), + ) + return cube + + @openeo_process + def load_stac( + self, + url: str, + spatial_extent: Union[Dict[str, float], Parameter, None] = None, + temporal_extent: Union[Sequence[InputDate], Parameter, str, None] = None, + bands: Optional[List[str]] = None, + properties: Optional[Dict[str, Union[str, PGNode, Callable]]] = None, + ) -> DataCube: + """ + Loads data from a static STAC catalog or a STAC API Collection and returns the data as a processable :py:class:`DataCube`. + A batch job result can be loaded by providing a reference to it. + + If supported by the underlying metadata and file format, the data that is added to the data cube can be + restricted with the parameters ``spatial_extent``, ``temporal_extent`` and ``bands``. + If no data is available for the given extents, a ``NoDataAvailable`` error is thrown. + + Remarks: + + * The bands (and all dimensions that specify nominal dimension labels) are expected to be ordered as + specified in the metadata if the ``bands`` parameter is set to ``null``. + * If no additional parameter is specified this would imply that the whole data set is expected to be loaded. + Due to the large size of many data sets, this is not recommended and may be optimized by back-ends to only + load the data that is actually required after evaluating subsequent processes such as filters. + This means that the values should be processed only after the data has been limited to the required extent + and as a consequence also to a manageable size. + + + :param url: The URL to a static STAC catalog (STAC Item, STAC Collection, or STAC Catalog) + or a specific STAC API Collection that allows to filter items and to download assets. + This includes batch job results, which itself are compliant to STAC. + For external URLs, authentication details such as API keys or tokens may need to be included in the URL. + + Batch job results can be specified in two ways: + + - For Batch job results at the same back-end, a URL pointing to the corresponding batch job results + endpoint should be provided. The URL usually ends with ``/jobs/{id}/results`` and ``{id}`` + is the corresponding batch job ID. + - For external results, a signed URL must be provided. Not all back-ends support signed URLs, + which are provided as a link with the link relation `canonical` in the batch job result metadata. + :param spatial_extent: + Limits the data to load to the specified bounding box or polygons. + + For raster data, the process loads the pixel into the data cube if the point at the pixel center intersects + with the bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). + + For vector data, the process loads the geometry into the data cube if the geometry is fully within the + bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). + Empty geometries may only be in the data cube if no spatial extent has been provided. + + The GeoJSON can be one of the following feature types: + + * A ``Polygon`` or ``MultiPolygon`` geometry, + * a ``Feature`` with a ``Polygon`` or ``MultiPolygon`` geometry, or + * a ``FeatureCollection`` containing at least one ``Feature`` with ``Polygon`` or ``MultiPolygon`` geometries. + + Set this parameter to ``None`` to set no limit for the spatial extent. + Be careful with this when loading large datasets. It is recommended to use this parameter instead of + using ``filter_bbox()`` or ``filter_spatial()`` directly after loading unbounded data. + + :param temporal_extent: + Limits the data to load to the specified left-closed temporal interval. + Applies to all temporal dimensions. + The interval has to be specified as an array with exactly two elements: + + 1. The first element is the start of the temporal interval. + The specified instance in time is **included** in the interval. + 2. The second element is the end of the temporal interval. + The specified instance in time is **excluded** from the interval. + + The second element must always be greater/later than the first element. + Otherwise, a `TemporalExtentEmpty` exception is thrown. + + Also supports open intervals by setting one of the boundaries to ``None``, but never both. + + Set this parameter to ``None`` to set no limit for the temporal extent. + Be careful with this when loading large datasets. It is recommended to use this parameter instead of + using ``filter_temporal()`` directly after loading unbounded data. + + :param bands: + Only adds the specified bands into the data cube so that bands that don't match the list + of band names are not available. Applies to all dimensions of type `bands`. + + Either the unique band name (metadata field ``name`` in bands) or one of the common band names + (metadata field ``common_name`` in bands) can be specified. + If the unique band name and the common name conflict, the unique band name has a higher priority. + + The order of the specified array defines the order of the bands in the data cube. + If multiple bands match a common name, all matched bands are included in the original order. + + It is recommended to use this parameter instead of using ``filter_bands()`` directly after loading unbounded data. + + :param properties: + Limits the data by metadata properties to include only data in the data cube which + all given conditions return ``True`` for (AND operation). + + Specify key-value-pairs with the key being the name of the metadata property, + which can be retrieved with the openEO Data Discovery for Collections. + The value must be a condition (user-defined process) to be evaluated against a STAC API. + This parameter is not supported for static STAC. + + .. versionadded:: 0.17.0 + + .. versionchanged:: 0.23.0 + Argument ``temporal_extent``: add support for year/month shorthand notation + as discussed at :ref:`date-shorthand-handling`. + """ + return DataCube.load_stac( + url=url, + spatial_extent=spatial_extent, + temporal_extent=temporal_extent, + bands=bands, + properties=properties, + connection=self, + ) + + def load_stac_from_job( + self, + job: Union[BatchJob, str], + spatial_extent: Union[Dict[str, float], Parameter, None] = None, + temporal_extent: Union[Sequence[InputDate], Parameter, str, None] = None, + bands: Optional[List[str]] = None, + properties: Optional[Dict[str, Union[str, PGNode, Callable]]] = None, + ) -> DataCube: + """ + Convenience function to directly load the results of a finished openEO job + (as a STAC collection) with :py:meth:`load_stac` in a new openEO process graph. + + When available, the "canonical" link (signed URL) of the job results will be used. + + :param job: a :py:class:`~openeo.rest.job.BatchJob` or job id pointing to a finished job. + Note that the :py:class:`~openeo.rest.job.BatchJob` approach allows to point + to a batch job on a different back-end. + :param spatial_extent: limit data to specified bounding box or polygons + :param temporal_extent: limit data to specified temporal interval. + :param bands: limit data to the specified bands + + .. versionadded:: 0.30.0 + """ + # TODO #634 add option to require or avoid the canonical link + if isinstance(job, str): + job = BatchJob(job_id=job, connection=self) + elif not isinstance(job, BatchJob): + raise ValueError("job must be a BatchJob or job id") + + try: + job_results = job.get_results() + + canonical_links = [ + link["href"] + for link in job_results.get_metadata().get("links", []) + if link.get("rel") == "canonical" and "href" in link + ] + if len(canonical_links) == 0: + _log.warning("No canonical link found in job results metadata. Using job results URL instead.") + stac_link = job.get_results_metadata_url(full=True) + else: + if len(canonical_links) > 1: + _log.warning( + f"Multiple canonical links found in job results metadata: {canonical_links}. Picking first one." + ) + stac_link = canonical_links[0] + except OpenEoApiError as e: + _log.warning(f"Failed to get the canonical job results: {e!r}. Using job results URL instead.") + stac_link = job.get_results_metadata_url(full=True) + + return self.load_stac( + url=stac_link, + spatial_extent=spatial_extent, + temporal_extent=temporal_extent, + bands=bands, + properties=properties, + ) + + def load_ml_model(self, id: Union[str, BatchJob]) -> MlModel: + """ + Loads a machine learning model from a STAC Item. + + :param id: STAC item reference, as URL, batch job (id) or user-uploaded file + :return: + + .. versionadded:: 0.10.0 + """ + return MlModel.load_ml_model(connection=self, id=id) + + @openeo_process + def load_geojson( + self, + data: Union[dict, str, Path, shapely.geometry.base.BaseGeometry, Parameter], + properties: Optional[List[str]] = None, + ): + """ + Converts GeoJSON data as defined by RFC 7946 into a vector data cube. + + :param data: the geometry to load. One of: + + - GeoJSON-style data structure: e.g. a dictionary with ``"type": "Polygon"`` and ``"coordinates"`` fields + - a path to a local GeoJSON file + - a GeoJSON string + - a shapely geometry object + + :param properties: A list of properties from the GeoJSON file to construct an additional dimension from. + :return: new VectorCube instance + + .. warning:: EXPERIMENTAL: this process is experimental with the potential for major things to change. + + .. versionadded:: 0.22.0 + """ + return VectorCube.load_geojson(connection=self, data=data, properties=properties) + + @openeo_process + def load_url(self, url: str, format: str, options: Optional[dict] = None): + """ + Loads a file from a URL + + :param url: The URL to read from. Authentication details such as API keys or tokens may need to be included in the URL. + :param format: The file format to use when loading the data. + :param options: The file format parameters to use when reading the data. + Must correspond to the parameters that the server reports as supported parameters for the chosen ``format`` + :return: new VectorCube instance + + .. warning:: EXPERIMENTAL: this process is experimental with the potential for major things to change. + + .. versionadded:: 0.22.0 + """ + if format not in self.list_input_formats(): + # TODO: make this an error? + _log.warning(f"Format {format!r} not listed in back-end input formats") + # TODO: Inspect format's gis_data_type to see if we need to load a VectorCube or classic raster DataCube + return VectorCube.load_url(connection=self, url=url, format=format, options=options) + + def create_service(self, graph: dict, type: str, **kwargs) -> Service: + # TODO: type hint for graph: is it a nested or a flat one? + pg_with_metadata = self._build_request_with_process_graph(process_graph=graph, type=type, **kwargs) + self._preflight_validation(pg_with_metadata=pg_with_metadata) + response = self.post(path="/services", json=pg_with_metadata, expected_status=201) + service_id = response.headers.get("OpenEO-Identifier") + return Service(service_id, self) + + @deprecated("Use :py:meth:`openeo.rest.service.Service.delete_service` instead.", version="0.8.0") + def remove_service(self, service_id: str): + """ + Stop and remove a secondary web service. + + :param service_id: service identifier + :return: + """ + Service(service_id, self).delete_service() + + @deprecated("Use :py:meth:`openeo.rest.job.BatchJob.get_results` instead.", version="0.4.10") + def job_results(self, job_id) -> dict: + """Get batch job results metadata.""" + return BatchJob(job_id=job_id, connection=self).list_results() + + @deprecated("Use :py:meth:`openeo.rest.job.BatchJob.logs` instead.", version="0.4.10") + def job_logs(self, job_id, offset) -> list: + """Get batch job logs.""" + return BatchJob(job_id=job_id, connection=self).logs(offset=offset) + + def list_files(self) -> List[UserFile]: + """ + Lists all user-uploaded files in the user workspace on the back-end. + + :return: List of the user-uploaded files. + """ + files = self.get('/files', expected_status=200).json()['files'] + files = [UserFile.from_metadata(metadata=f, connection=self) for f in files] + return VisualList("data-table", data=files, parameters={'columns': 'files'}) + + def get_file( + self, path: Union[str, PurePosixPath], metadata: Optional[dict] = None + ) -> UserFile: + """ + Gets a handle to a user-uploaded file in the user workspace on the back-end. + + :param path: The path on the user workspace. + """ + return UserFile(path=path, connection=self, metadata=metadata) + + def upload_file( + self, + source: Union[Path, str], + target: Optional[Union[str, PurePosixPath]] = None, + ) -> UserFile: + """ + Uploads a file to the given target location in the user workspace on the back-end. + + If a file at the target path exists in the user workspace it will be replaced. + + :param source: A path to a file on the local file system to upload. + :param target: The desired path (which can contain a folder structure if desired) on the user workspace. + If not set: defaults to the original filename (without any folder structure) of the local file . + """ + source = Path(source) + target = target or source.name + # TODO: support other non-path sources too: bytes, open file, url, ... + with source.open("rb") as f: + resp = self.put(f"/files/{target!s}", expected_status=200, data=f) + metadata = resp.json() + return UserFile.from_metadata(metadata=metadata, connection=self) + + def _build_request_with_process_graph( + self, + process_graph: Union[dict, FlatGraphableMixin, str, Path, List[FlatGraphableMixin]], + additional: Optional[dict] = None, + job_options: Optional[dict] = None, + **kwargs, + ) -> dict: + """ + Prepare a json payload with a process graph to submit to /result, /services, /jobs, ... + :param process_graph: flat dict representing a "process graph with metadata" ({"process": {"process_graph": ...}, ...}) + """ + # TODO: make this a more general helper (like `as_flat_graph`) + connections = extract_connections(process_graph) + if any(c != self for c in connections): + raise OpenEoClientException(f"Mixing different connections: {self} and {connections}.") + result = kwargs + + if additional: + result.update(additional) + if job_options is not None: + # Note: this "job_options" top-level property is not in official openEO API spec, + # but a commonly used convention, e.g. in openeo-python-driver based deployments. + assert "job_options" not in result + result["job_options"] = job_options + + process_graph = as_flat_graph(process_graph) + if "process_graph" not in process_graph: + process_graph = {"process_graph": process_graph} + # TODO: also check if `process_graph` already has "process" key (i.e. is a "process graph with metadata" already) + result["process"] = process_graph + return result + + def _preflight_validation(self, pg_with_metadata: dict, *, validate: Optional[bool] = None): + """ + Preflight validation of process graph to execute. + + :param pg_with_metadata: flat dict representation of process graph with metadata, + e.g. as produced by `_build_request_with_process_graph` + :param validate: Optional toggle to enable/prevent validation of the process graphs before execution + (overruling the connection's ``auto_validate`` setting). + + :return: + """ + if validate is None: + validate = self._auto_validate + if validate and self.capabilities().supports_endpoint("/validation", "POST"): + # At present, the intention is that a failed validation does not block + # the job from running, it is only reported as a warning. + # Therefor we also want to continue when something *else* goes wrong + # *during* the validation. + try: + resp = self.post(path="/validation", json=pg_with_metadata["process"], expected_status=200) + validation_errors = resp.json()["errors"] + if validation_errors: + _log.warning( + "Preflight process graph validation raised: " + + (" ".join(f"[{e.get('code')}] {e.get('message')}" for e in validation_errors)) + ) + except Exception as e: + _log.error(f"Preflight process graph validation failed: {e}") + + # TODO: additional validation and sanity checks: e.g. is there a result node, are all process_ids valid, ...? + + # TODO: unify `download` and `execute` better: e.g. `download` always writes to disk, `execute` returns result (raw or as JSON decoded dict) + def download( + self, + graph: Union[dict, FlatGraphableMixin, str, Path, List[FlatGraphableMixin]], + outputfile: Union[Path, str, None] = None, + *, + timeout: Optional[int] = None, + validate: Optional[bool] = None, + chunk_size: int = DEFAULT_DOWNLOAD_CHUNK_SIZE, + additional: Optional[dict] = None, + job_options: Optional[dict] = None, + ) -> Union[None, bytes]: + """ + Downloads the result of a process graph synchronously, + and save the result to the given file or return bytes object if no outputfile is specified. + This method is useful to export binary content such as images. For json content, the execute method is recommended. + + :param graph: (flat) dict representing a process graph, or process graph as raw JSON string, + or as local file path or URL + :param outputfile: output file + :param timeout: timeout to wait for response + :param validate: Optional toggle to enable/prevent validation of the process graphs before execution + (overruling the connection's ``auto_validate`` setting). + :param chunk_size: chunk size for streaming response. + :param additional: additional (top-level) properties to set in the request body + :param job_options: dictionary of job options to pass to the backend + (under top-level property "job_options") + + .. versionadded:: 0.36.0 + Added arguments ``additional`` and ``job_options``. + """ + pg_with_metadata = self._build_request_with_process_graph( + process_graph=graph, additional=additional, job_options=job_options + ) + self._preflight_validation(pg_with_metadata=pg_with_metadata, validate=validate) + response = self.post( + path="/result", + json=pg_with_metadata, + expected_status=200, + stream=True, + timeout=timeout or DEFAULT_TIMEOUT_SYNCHRONOUS_EXECUTE, + ) + + if outputfile is not None: + with Path(outputfile).open(mode="wb") as f: + for chunk in response.iter_content(chunk_size=chunk_size): + f.write(chunk) + else: + return response.content + + def execute( + self, + process_graph: Union[dict, FlatGraphableMixin, str, Path, List[FlatGraphableMixin]], + *, + timeout: Optional[int] = None, + validate: Optional[bool] = None, + auto_decode: bool = True, + additional: Optional[dict] = None, + job_options: Optional[dict] = None, + ) -> Union[dict, requests.Response]: + """ + Execute a process graph synchronously and return the result. If the result is a JSON object, it will be parsed. + + :param process_graph: (flat) dict representing a process graph, or process graph as raw JSON string, + or as local file path or URL + :param validate: Optional toggle to enable/prevent validation of the process graphs before execution + (overruling the connection's ``auto_validate`` setting). + :param auto_decode: Boolean flag to enable/disable automatic JSON decoding of the response. Defaults to True. + :param additional: additional (top-level) properties to set in the request body + :param job_options: dictionary of job options to pass to the backend + (under top-level property "job_options") + + :return: parsed JSON response as a dict if auto_decode is True, otherwise response object + + .. versionadded:: 0.36.0 + Added arguments ``additional`` and ``job_options``. + """ + pg_with_metadata = self._build_request_with_process_graph( + process_graph=process_graph, additional=additional, job_options=job_options + ) + self._preflight_validation(pg_with_metadata=pg_with_metadata, validate=validate) + response = self.post( + path="/result", + json=pg_with_metadata, + expected_status=200, + timeout=timeout or DEFAULT_TIMEOUT_SYNCHRONOUS_EXECUTE, + ) + if auto_decode: + try: + return response.json() + except requests.exceptions.JSONDecodeError as e: + raise OpenEoClientException( + "Failed to decode response as JSON. For other data types use `download` method instead of `execute`." + ) from e + else: + return response + + def create_job( + self, + process_graph: Union[dict, FlatGraphableMixin, str, Path, List[FlatGraphableMixin]], + *, + title: Optional[str] = None, + description: Optional[str] = None, + plan: Optional[str] = None, + budget: Optional[float] = None, + additional: Optional[dict] = None, + job_options: Optional[dict] = None, + validate: Optional[bool] = None, + ) -> BatchJob: + """ + Create a new job from given process graph on the back-end. + + :param process_graph: openEO-style (flat) process graph representation, + or an object that can be converted to such a representation: + a dictionary, a :py:class:`~openeo.rest.datacube.DataCube` object, + a string with a JSON representation, + a local file path or URL to a JSON representation, + a :py:class:`~openeo.rest.multiresult.MultiResult` object, ... + :param title: job title + :param description: job description + :param plan: The billing plan to process and charge the job with + :param budget: Maximum budget to be spent on executing the job. + Note that some backends do not honor this limit. + :param additional: additional (top-level) properties to set in the request body + :param job_options: dictionary of job options to pass to the backend + (under top-level property "job_options") + :param validate: Optional toggle to enable/prevent validation of the process graphs before execution + (overruling the connection's ``auto_validate`` setting). + :return: Created job + + .. versionchanged:: 0.35.0 + Add :ref:`multi-result support `. + + .. versionadded:: 0.36.0 + Added argument ``job_options``. + """ + # TODO move all this (BatchJob factory) logic to BatchJob? + + pg_with_metadata = self._build_request_with_process_graph( + process_graph=process_graph, + additional=additional, + job_options=job_options, + **dict_no_none(title=title, description=description, plan=plan, budget=budget) + ) + + self._preflight_validation(pg_with_metadata=pg_with_metadata, validate=validate) + response = self.post("/jobs", json=pg_with_metadata, expected_status=201) + + job_id = None + if "openeo-identifier" in response.headers: + job_id = response.headers['openeo-identifier'].strip() + elif "location" in response.headers: + _log.warning("Backend did not explicitly respond with job id, will guess it from redirect URL.") + job_id = response.headers['location'].split("/")[-1] + if not job_id: + raise OpenEoClientException("Job creation response did not contain a valid job id") + return BatchJob(job_id=job_id, connection=self) + + def job(self, job_id: str) -> BatchJob: + """ + Get the job based on the id. The job with the given id should already exist. + + Use :py:meth:`openeo.rest.connection.Connection.create_job` to create new jobs + + :param job_id: the job id of an existing job + :return: A job object. + """ + return BatchJob(job_id=job_id, connection=self) + + def service(self, service_id: str) -> Service: + """ + Get the secondary web service based on the id. The service with the given id should already exist. + + Use :py:meth:`openeo.rest.connection.Connection.create_service` to create new services + + :param job_id: the service id of an existing secondary web service + :return: A service object. + """ + return Service(service_id, connection=self) + + @deprecated( + reason="Depends on non-standard process, replace with :py:meth:`openeo.rest.connection.Connection.load_stac` where possible.", + version="0.25.0") + def load_disk_collection( + self, format: str, glob_pattern: str, options: Optional[dict] = None + ) -> DataCube: + """ + Loads image data from disk as a :py:class:`DataCube`. + + This is backed by a non-standard process ('load_disk_data'). This will eventually be replaced by standard options such as + :py:meth:`openeo.rest.connection.Connection.load_stac` or https://processes.openeo.org/#load_uploaded_files + + :param format: the file format, e.g. 'GTiff' + :param glob_pattern: a glob pattern that matches the files to load from disk + :param options: options specific to the file format + """ + return DataCube.load_disk_collection( + self, format, glob_pattern, **(options or {}) + ) + + def as_curl( + self, + data: Union[dict, DataCube, FlatGraphableMixin], + *, + path="/result", + method="POST", + obfuscate_auth: bool = False, + additional: Optional[dict] = None, + job_options: Optional[dict] = None, + ) -> str: + """ + Build curl command to evaluate given process graph or data cube + (including authorization and content-type headers). + + >>> print(connection.as_curl(cube)) + curl -i -X POST -H 'Content-Type: application/json' -H 'Authorization: Bearer ...' \\ + --data '{"process":{"process_graph":{...}}' \\ + https://openeo.example/openeo/1.1/result + + :param data: something that is convertable to an openEO process graph: a dictionary, + a :py:class:`~openeo.rest.datacube.DataCube` object, + a :py:class:`~openeo.processes.ProcessBuilder`, ... + :param path: endpoint to send request to: typically ``"/result"`` (default) for synchronous requests + or ``"/jobs"`` for batch jobs + :param method: HTTP method to use (typically ``"POST"``) + :param obfuscate_auth: don't show actual bearer token + :param additional: additional (top-level) properties to set in the request body + :param job_options: dictionary of job options to pass to the backend + (under top-level property "job_options") + + :return: curl command as a string + + .. versionadded:: 0.36.0 + Added arguments ``additional`` and ``job_options``. + """ + cmd = ["curl", "-i", "-X", method] + cmd += ["-H", "Content-Type: application/json"] + if isinstance(self.auth, BearerAuth): + cmd += ["-H", f"Authorization: Bearer {'...' if obfuscate_auth else self.auth.bearer}"] + pg_with_metadata = self._build_request_with_process_graph(data, additional=additional, job_options=job_options) + if path == "/validation": + pg_with_metadata = pg_with_metadata["process"] + post_json = json.dumps(pg_with_metadata, separators=(",", ":")) + cmd += ["--data", post_json] + cmd += [self.build_url(path)] + return " ".join(shlex.quote(c) for c in cmd) + + def version_info(self): + """List version of the openEO client, API, back-end, etc.""" + capabilities = self.capabilities() + return { + "client": openeo.client_version(), + "api": capabilities.api_version(), + "backend": dict_no_none({ + "root_url": self.root_url, + "version": capabilities.get("backend_version"), + "processing:software": capabilities.get("processing:software"), + }), + } + + +def connect( + url: Optional[str] = None, + *, + auth_type: Optional[str] = None, + auth_options: Optional[dict] = None, + session: Optional[requests.Session] = None, + default_timeout: Optional[int] = None, + auto_validate: bool = True, +) -> Connection: + """ + This method is the entry point to OpenEO. + You typically create one connection object in your script or application + and re-use it for all calls to that backend. + + If the backend requires authentication, you can pass authentication data directly to this function, + but it could be easier to authenticate as follows: + + >>> # For basic authentication + >>> conn = connect(url).authenticate_basic(username="john", password="foo") + >>> # For OpenID Connect authentication + >>> conn = connect(url).authenticate_oidc(client_id="myclient") + + :param url: The http url of the OpenEO back-end. + :param auth_type: Which authentication to use: None, "basic" or "oidc" (for OpenID Connect) + :param auth_options: Options/arguments specific to the authentication type + :param default_timeout: default timeout (in seconds) for requests + :param auto_validate: toggle to automatically validate process graphs before execution + + .. versionadded:: 0.24.0 + added ``auto_validate`` argument + """ + + def _config_log(message): + _log.info(message) + config_log(message) + + if url is None: + default_backend = get_config_option("connection.default_backend") + if default_backend: + url = default_backend + _config_log(f"Using default back-end URL {url!r} (from config)") + default_backend_auto_auth = get_config_option("connection.default_backend.auto_authenticate") + if default_backend_auto_auth and default_backend_auto_auth.lower() in {"basic", "oidc"}: + auth_type = default_backend_auto_auth.lower() + _config_log(f"Doing auto-authentication {auth_type!r} (from config)") + + if auth_type is None: + auto_authenticate = get_config_option("connection.auto_authenticate") + if auto_authenticate and auto_authenticate.lower() in {"basic", "oidc"}: + auth_type = auto_authenticate.lower() + _config_log(f"Doing auto-authentication {auth_type!r} (from config)") + + if not url: + raise OpenEoClientException("No openEO back-end URL given or known to connect to.") + connection = Connection(url, session=session, default_timeout=default_timeout, auto_validate=auto_validate) + + auth_type = auth_type.lower() if isinstance(auth_type, str) else auth_type + if auth_type in {None, False, 'null', 'none'}: + pass + elif auth_type == "basic": + connection.authenticate_basic(**(auth_options or {})) + elif auth_type in {"oidc", "openid"}: + connection.authenticate_oidc(**(auth_options or {})) + else: + raise ValueError("Unknown auth type {a!r}".format(a=auth_type)) + return connection + + +@deprecated("Use :py:func:`openeo.connect` instead", version="0.0.9") +def session(userid=None, endpoint: str = "https://openeo.org/openeo") -> Connection: + """ + This method is the entry point to OpenEO. You typically create one session object in your script or application, per back-end. + and re-use it for all calls to that backend. + If the backend requires authentication, you should set pass your credentials. + + :param endpoint: The http url of an OpenEO endpoint. + :rtype: openeo.sessions.Session + """ + return connect(url=endpoint) + + +def paginate(con: Connection, url: str, params: Optional[dict] = None, callback: Callable = lambda resp, page: resp): + # TODO: make this a method `get_paginated` on `RestApiConnection`? + # TODO: is it necessary to have `callback`? It's only used just before yielding, + # so it's probably cleaner (even for the caller) to to move it outside. + page = 1 + while True: + response = con.get(url, params=params).json() + yield callback(response, page) + next_links = [link for link in response.get("links", []) if link.get("rel") == "next" and "href" in link] + if not next_links: + break + url = next_links[0]["href"] + page += 1 + params = {} + + +def extract_connections( + data: Union[_ProcessGraphAbstraction, Sequence[_ProcessGraphAbstraction], Any] +) -> Set[Connection]: + """ + Extract the :py:class:`Connection` object(s) linked from a given data construct. + Typical use case is to get the connection from a :py:class:`DataCube`, + but can also extract multiple connections from a list of data cubes. + """ + connections = set() + # TODO: define some kind of "Connected" interface/mixin/protocol + # for objects that contain a connection instead of just checking for _ProcessGraphAbstraction + # TODO: also support extracting connections from other objects like BatchJob, ... + if isinstance(data, _ProcessGraphAbstraction) and data.connection: + connections.add(data.connection) + elif isinstance(data, (list, tuple, set)): + for item in data: + if isinstance(item, _ProcessGraphAbstraction) and item.connection: + connections.add(item.connection) + + return connections diff --git a/lib/openeo/rest/conversions.py b/lib/openeo/rest/conversions.py new file mode 100644 index 000000000..6268bed1a --- /dev/null +++ b/lib/openeo/rest/conversions.py @@ -0,0 +1,124 @@ +""" +Helpers for data conversions between Python ecosystem data types and openEO data structures. +""" + +from __future__ import annotations + +import typing + +import numpy as np +import pandas + +from openeo.internal.warnings import deprecated + +if typing.TYPE_CHECKING: + # Imports for type checking only (circular import issue at runtime). + import xarray + + from openeo.udf import XarrayDataCube + + +class InvalidTimeSeriesException(ValueError): + pass + + +def timeseries_json_to_pandas(timeseries: dict, index: str = "date", auto_collapse=True) -> pandas.DataFrame: + """ + Convert a timeseries JSON object as returned by the `aggregate_spatial` process to a pandas DataFrame object + + This timeseries data has three dimensions in general: date, polygon index and band index. + One of these will be used as index of the resulting dataframe (as specified by the `index` argument), + and the other two will be used as multilevel columns. + When there is just a single polygon or band in play, the dataframe will be simplified + by removing the corresponding dimension if `auto_collapse` is enabled (on by default). + + :param timeseries: dictionary as returned by `aggregate_spatial` + :param index: which dimension should be used for the DataFrame index: 'date' or 'polygon' + :param auto_collapse: whether single band or single polygon cases should be simplified automatically + + :return: pandas DataFrame or Series + """ + # The input timeseries dictionary is assumed to have this structure: + # {dict mapping date -> [list with one item per polygon: [list with one float/None per band or empty list]]} + # TODO is this format of `aggregate_spatial` standardized across backends? Or can we detect the structure? + # TODO: option to pass a path to a JSON file as input? + + # Some quick checks + if len(timeseries) == 0: + raise InvalidTimeSeriesException("Empty data set") + polygon_counts = set(len(polygon_data) for polygon_data in timeseries.values()) + if polygon_counts == {0}: + raise InvalidTimeSeriesException("No polygon data for each date") + elif 0 in polygon_counts: + # TODO: still support this use case? + raise InvalidTimeSeriesException("No polygon data for some dates ({p})".format(p=polygon_counts)) + elif len(polygon_counts) > 1: + raise InvalidTimeSeriesException("Inconsistent polygon counts: {p}".format(p=polygon_counts)) + # Count the number of bands in the timeseries, so we can provide a fallback array for missing data + band_counts = set(len(band_data) for polygon_data in timeseries.values() for band_data in polygon_data) + if band_counts == {0}: + raise InvalidTimeSeriesException("Zero bands everywhere") + band_counts.discard(0) + if len(band_counts) != 1: + raise InvalidTimeSeriesException("Inconsistent band counts: {b}".format(b=band_counts)) + band_count = band_counts.pop() + band_data_fallback = [np.nan] * band_count + # Load the timeseries data in a pandas Series with multi-index ["date", "polygon", "band"] + s = pandas.DataFrame.from_records( + ( + (date, polygon_index, band_index, value) + for (date, polygon_data) in timeseries.items() + for polygon_index, band_data in enumerate(polygon_data) + for band_index, value in enumerate(band_data or band_data_fallback) + ), + columns=["date", "polygon", "band", "value"], + index=["date", "polygon", "band"] + )["value"].rename(None) + # TODO convert date to real date index? + + if auto_collapse: + if s.index.levshape[2] == 1: + # Single band case + s.index = s.index.droplevel("band") + if s.index.levshape[1] == 1: + # Single polygon case + s.index = s.index.droplevel("polygon") + + # Reshape as desired + if index == "date": + if len(s.index.names) > 1: + return s.unstack("date").T + else: + return s + elif index == "polygon": + return s.unstack("polygon").T + else: + raise ValueError(index) + + +@deprecated("Use :py:meth:`XarrayDataCube.from_file` instead.", version="0.7.0") +def datacube_from_file(filename, fmt="netcdf") -> XarrayDataCube: + from openeo.udf.xarraydatacube import XarrayDataCube + return XarrayDataCube.from_file(path=filename, fmt=fmt) + + +@deprecated("Use :py:meth:`XarrayDataCube.save_to_file` instead.", version="0.7.0") +def datacube_to_file(datacube: XarrayDataCube, filename, fmt="netcdf"): + return datacube.save_to_file(path=filename, fmt=fmt) + + +@deprecated("Use :py:meth:`XarrayIO.to_json_file` instead", version="0.7.0") +def _save_DataArray_to_JSON(filename, array: xarray.DataArray): + from openeo.udf.xarraydatacube import XarrayIO + return XarrayIO.to_json_file(array=array, path=filename) + + +@deprecated("Use :py:meth:`XarrayIO.to_netcdf_file` instead", version="0.7.0") +def _save_DataArray_to_NetCDF(filename, array: xarray.DataArray): + from openeo.udf.xarraydatacube import XarrayIO + return XarrayIO.to_netcdf_file(array=array, path=filename) + + +@deprecated("Use :py:meth:`XarrayDataCube.plot` instead.", version="0.7.0") +def datacube_plot(datacube: XarrayDataCube, *args, **kwargs): + datacube.plot(*args, **kwargs) diff --git a/lib/openeo/rest/datacube.py b/lib/openeo/rest/datacube.py new file mode 100644 index 000000000..db087f854 --- /dev/null +++ b/lib/openeo/rest/datacube.py @@ -0,0 +1,2887 @@ +""" +The main module for creating earth observation processes. It aims to easily build complex process chains, that can +be evaluated by an openEO backend. + +.. data:: THIS + + Symbolic reference to the current data cube, to be used as argument in :py:meth:`DataCube.process()` calls + +""" +from __future__ import annotations + +import datetime +import logging +import pathlib +import re +import typing +import urllib.parse +import warnings +from builtins import staticmethod +from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Tuple, Union + +import numpy as np +import requests +import shapely.geometry +import shapely.geometry.base +from shapely.geometry import MultiPolygon, Polygon, mapping + +from openeo.api.process import Parameter, schema_supports +from openeo.dates import get_temporal_extent +from openeo.internal.documentation import openeo_process +from openeo.internal.graph_building import PGNode, ReduceNode, _FromNodeMixin +from openeo.internal.jupyter import in_jupyter_context +from openeo.internal.processes.builder import ( + ProcessBuilderBase, + convert_callable_to_pgnode, + get_parameter_names, +) +from openeo.internal.warnings import UserDeprecationWarning, deprecated, legacy_alias +from openeo.metadata import ( + Band, + BandDimension, + CollectionMetadata, + SpatialDimension, + TemporalDimension, + metadata_from_stac, +) +from openeo.processes import ProcessBuilder +from openeo.rest import BandMathException, OpenEoClientException, OperatorException +from openeo.rest._datacube import ( + THIS, + UDF, + _ensure_save_result, + _ProcessGraphAbstraction, + build_child_callback, +) +from openeo.rest.graph_building import CollectionProperty +from openeo.rest.job import BatchJob, RESTJob +from openeo.rest.mlmodel import MlModel +from openeo.rest.service import Service +from openeo.rest.udp import RESTUserDefinedProcess +from openeo.rest.vectorcube import VectorCube +from openeo.util import dict_no_none, guess_format, load_json, normalize_crs, rfc3339 + +if typing.TYPE_CHECKING: + # Imports for type checking only (circular import issue at runtime). + import xarray + + from openeo.rest.connection import Connection + from openeo.udf import XarrayDataCube + + +log = logging.getLogger(__name__) + + +# Type annotation aliases +InputDate = Union[str, datetime.date, Parameter, PGNode, ProcessBuilderBase, None] + + +class DataCube(_ProcessGraphAbstraction): + """ + Class representing a openEO (raster) data cube. + + The data cube is represented by its corresponding openeo "process graph" + and this process graph can be "grown" to a desired workflow by calling the appropriate methods. + """ + + # TODO: set this based on back-end or user preference? + _DEFAULT_RASTER_FORMAT = "GTiff" + + def __init__( + self, graph: PGNode, connection: Optional[Connection] = None, metadata: Optional[CollectionMetadata] = None + ): + super().__init__(pgnode=graph, connection=connection) + self.metadata: Optional[CollectionMetadata] = metadata + + def process( + self, + process_id: str, + arguments: Optional[dict] = None, + metadata: Optional[CollectionMetadata] = None, + namespace: Optional[str] = None, + **kwargs, + ) -> DataCube: + """ + Generic helper to create a new DataCube by applying a process. + + :param process_id: process id of the process. + :param arguments: argument dictionary for the process. + :param metadata: optional: metadata to override original cube metadata (e.g. when reducing dimensions) + :param namespace: optional: process namespace + :return: new DataCube instance + """ + pg = self._build_pgnode(process_id=process_id, arguments=arguments, namespace=namespace, **kwargs) + return DataCube(graph=pg, connection=self._connection, metadata=metadata or self.metadata) + + graph_add_node = legacy_alias(process, "graph_add_node", since="0.1.1") + + def process_with_node(self, pg: PGNode, metadata: Optional[CollectionMetadata] = None) -> DataCube: + """ + Generic helper to create a new DataCube by applying a process (given as process graph node) + + :param pg: process graph node (containing process id and arguments) + :param metadata: optional: metadata to override original cube metadata (e.g. when reducing dimensions) + :return: new DataCube instance + """ + # TODO: deep copy `self.metadata` instead of using same instance? + # TODO: cover more cases where metadata has to be altered + # TODO: deprecate `process_with_node``: little added value over just calling DataCube() directly + return DataCube(graph=pg, connection=self._connection, metadata=metadata or self.metadata) + + def _do_metadata_normalization(self) -> bool: + """Do metadata-based normalization/validation of dimension names, band names, ...""" + return isinstance(self.metadata, CollectionMetadata) + + def _assert_valid_dimension_name(self, name: str) -> str: + if self._do_metadata_normalization(): + self.metadata.assert_valid_dimension(name) + return name + + @classmethod + @openeo_process + def load_collection( + cls, + collection_id: Union[str, Parameter], + connection: Optional[Connection] = None, + spatial_extent: Union[Dict[str, float], Parameter, None] = None, + temporal_extent: Union[Sequence[InputDate], Parameter, str, None] = None, + bands: Union[None, List[str], Parameter] = None, + fetch_metadata: bool = True, + properties: Union[ + None, Dict[str, Union[str, PGNode, typing.Callable]], List[CollectionProperty], CollectionProperty + ] = None, + max_cloud_cover: Optional[float] = None, + ) -> DataCube: + """ + Create a new Raster Data cube. + + :param collection_id: image collection identifier + :param connection: The backend connection to use. + Can be ``None`` to work without connection and collection metadata. + :param spatial_extent: limit data to specified bounding box or polygons + :param temporal_extent: limit data to specified temporal interval. + Typically, just a two-item list or tuple containing start and end date. + See :ref:`filtering-on-temporal-extent-section` for more details on temporal extent handling and shorthand notation. + :param bands: only add the specified bands. + :param properties: limit data by metadata property predicates. + See :py:func:`~openeo.rest.graph_building.collection_property` for easy construction of such predicates. + :param max_cloud_cover: shortcut to set maximum cloud cover ("eo:cloud_cover" collection property) + :return: new DataCube containing the collection + + .. versionchanged:: 0.13.0 + added the ``max_cloud_cover`` argument. + + .. versionchanged:: 0.23.0 + Argument ``temporal_extent``: add support for year/month shorthand notation + as discussed at :ref:`date-shorthand-handling`. + + .. versionchanged:: 0.26.0 + Add :py:func:`~openeo.rest.graph_building.collection_property` support to ``properties`` argument. + """ + if temporal_extent: + temporal_extent = cls._get_temporal_extent(extent=temporal_extent) + + if isinstance(spatial_extent, Parameter): + if not schema_supports(spatial_extent.schema, type="object"): + warnings.warn( + "Unexpected parameterized `spatial_extent` in `load_collection`:" + f" expected schema compatible with type 'object' but got {spatial_extent.schema!r}." + ) + arguments = { + 'id': collection_id, + # TODO: spatial_extent could also be a "geojson" subtype object, so we might want to allow (and convert) shapely shapes as well here. + 'spatial_extent': spatial_extent, + 'temporal_extent': temporal_extent, + } + if isinstance(collection_id, Parameter): + fetch_metadata = False + metadata: Optional[CollectionMetadata] = ( + connection.collection_metadata(collection_id) if connection and fetch_metadata else None + ) + if bands: + if isinstance(bands, str): + bands = [bands] + elif isinstance(bands, Parameter): + metadata = None + if metadata: + bands = [b if isinstance(b, str) else metadata.band_dimension.band_name(b) for b in bands] + # TODO: also apply spatial/temporal filters to metadata? + metadata = metadata.filter_bands(bands) + arguments['bands'] = bands + + if isinstance(properties, list): + # TODO: warn about items that are not CollectionProperty objects instead of silently dropping them. + properties = {p.name: p.from_node() for p in properties if isinstance(p, CollectionProperty)} + if isinstance(properties, CollectionProperty): + properties = {properties.name: properties.from_node()} + elif properties is None: + properties = {} + if max_cloud_cover: + properties["eo:cloud_cover"] = lambda v: v <= max_cloud_cover + if properties: + summaries = metadata and metadata.get("summaries") or {} + undefined_properties = set(properties.keys()).difference(summaries.keys()) + if undefined_properties: + warnings.warn( + f"{collection_id} property filtering with properties that are undefined " + f"in the collection metadata (summaries): {', '.join(undefined_properties)}.", + stacklevel=2, + ) + arguments["properties"] = { + prop: build_child_callback(pred, parent_parameters=["value"]) for prop, pred in properties.items() + } + + pg = PGNode( + process_id='load_collection', + arguments=arguments + ) + return cls(graph=pg, connection=connection, metadata=metadata) + + create_collection = legacy_alias( + load_collection, name="create_collection", since="0.4.6" + ) + + @classmethod + @deprecated(reason="Depends on non-standard process, replace with :py:meth:`openeo.rest.connection.Connection.load_stac` where possible.",version="0.25.0") + def load_disk_collection(cls, connection: Connection, file_format: str, glob_pattern: str, **options) -> DataCube: + """ + Loads image data from disk as a DataCube. + This is backed by a non-standard process ('load_disk_data'). This will eventually be replaced by standard options such as + :py:meth:`openeo.rest.connection.Connection.load_stac` or https://processes.openeo.org/#load_uploaded_files + + + :param connection: The connection to use to connect with the backend. + :param file_format: the file format, e.g. 'GTiff' + :param glob_pattern: a glob pattern that matches the files to load from disk + :param options: options specific to the file format + :return: the data as a DataCube + """ + pg = PGNode( + process_id='load_disk_data', + arguments={ + 'format': file_format, + 'glob_pattern': glob_pattern, + 'options': options + } + ) + return cls(graph=pg, connection=connection) + + @classmethod + def load_stac( + cls, + url: str, + spatial_extent: Union[Dict[str, float], Parameter, None] = None, + temporal_extent: Union[Sequence[InputDate], Parameter, str, None] = None, + bands: Optional[List[str]] = None, + properties: Optional[Dict[str, Union[str, PGNode, Callable]]] = None, + connection: Optional[Connection] = None, + ) -> DataCube: + """ + Loads data from a static STAC catalog or a STAC API Collection and returns the data as a processable :py:class:`DataCube`. + A batch job result can be loaded by providing a reference to it. + + If supported by the underlying metadata and file format, the data that is added to the data cube can be + restricted with the parameters ``spatial_extent``, ``temporal_extent`` and ``bands``. + If no data is available for the given extents, a ``NoDataAvailable`` error is thrown. + + Remarks: + + * The bands (and all dimensions that specify nominal dimension labels) are expected to be ordered as + specified in the metadata if the ``bands`` parameter is set to ``null``. + * If no additional parameter is specified this would imply that the whole data set is expected to be loaded. + Due to the large size of many data sets, this is not recommended and may be optimized by back-ends to only + load the data that is actually required after evaluating subsequent processes such as filters. + This means that the values should be processed only after the data has been limited to the required extent + and as a consequence also to a manageable size. + + + :param url: The URL to a static STAC catalog (STAC Item, STAC Collection, or STAC Catalog) + or a specific STAC API Collection that allows to filter items and to download assets. + This includes batch job results, which itself are compliant to STAC. + For external URLs, authentication details such as API keys or tokens may need to be included in the URL. + + Batch job results can be specified in two ways: + + - For Batch job results at the same back-end, a URL pointing to the corresponding batch job results + endpoint should be provided. The URL usually ends with ``/jobs/{id}/results`` and ``{id}`` + is the corresponding batch job ID. + - For external results, a signed URL must be provided. Not all back-ends support signed URLs, + which are provided as a link with the link relation `canonical` in the batch job result metadata. + :param spatial_extent: + Limits the data to load to the specified bounding box or polygons. + + For raster data, the process loads the pixel into the data cube if the point at the pixel center intersects + with the bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). + + For vector data, the process loads the geometry into the data cube if the geometry is fully within the + bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). + Empty geometries may only be in the data cube if no spatial extent has been provided. + + The GeoJSON can be one of the following feature types: + + * A ``Polygon`` or ``MultiPolygon`` geometry, + * a ``Feature`` with a ``Polygon`` or ``MultiPolygon`` geometry, or + * a ``FeatureCollection`` containing at least one ``Feature`` with ``Polygon`` or ``MultiPolygon`` geometries. + + Set this parameter to ``None`` to set no limit for the spatial extent. + Be careful with this when loading large datasets. It is recommended to use this parameter instead of + using ``filter_bbox()`` or ``filter_spatial()`` directly after loading unbounded data. + + :param temporal_extent: + Limits the data to load to the specified left-closed temporal interval. + Applies to all temporal dimensions. + The interval has to be specified as an array with exactly two elements: + + 1. The first element is the start of the temporal interval. + The specified instance in time is **included** in the interval. + 2. The second element is the end of the temporal interval. + The specified instance in time is **excluded** from the interval. + + The second element must always be greater/later than the first element. + Otherwise, a `TemporalExtentEmpty` exception is thrown. + + Also supports open intervals by setting one of the boundaries to ``None``, but never both. + + Set this parameter to ``None`` to set no limit for the temporal extent. + Be careful with this when loading large datasets. It is recommended to use this parameter instead of + using ``filter_temporal()`` directly after loading unbounded data. + + :param bands: + Only adds the specified bands into the data cube so that bands that don't match the list + of band names are not available. Applies to all dimensions of type `bands`. + + Either the unique band name (metadata field ``name`` in bands) or one of the common band names + (metadata field ``common_name`` in bands) can be specified. + If the unique band name and the common name conflict, the unique band name has a higher priority. + + The order of the specified array defines the order of the bands in the data cube. + If multiple bands match a common name, all matched bands are included in the original order. + + It is recommended to use this parameter instead of using ``filter_bands()`` directly after loading unbounded data. + + :param properties: + Limits the data by metadata properties to include only data in the data cube which + all given conditions return ``True`` for (AND operation). + + Specify key-value-pairs with the key being the name of the metadata property, + which can be retrieved with the openEO Data Discovery for Collections. + The value must be a condition (user-defined process) to be evaluated against a STAC API. + This parameter is not supported for static STAC. + + :param connection: The connection to use to connect with the backend. + + .. versionadded:: 0.33.0 + + """ + arguments = {"url": url} + # TODO #425 more normalization/validation of extent/band parameters + if spatial_extent: + arguments["spatial_extent"] = spatial_extent + if temporal_extent: + arguments["temporal_extent"] = DataCube._get_temporal_extent(extent=temporal_extent) + if bands: + arguments["bands"] = bands + if properties: + arguments["properties"] = { + prop: build_child_callback(pred, parent_parameters=["value"]) for prop, pred in properties.items() + } + graph = PGNode("load_stac", arguments=arguments) + try: + metadata = metadata_from_stac(url) + if bands: + # TODO: also apply spatial/temporal filters to metadata? + metadata = metadata.filter_bands(band_names=bands) + except Exception: + log.warning(f"Failed to extract cube metadata from STAC URL {url}", exc_info=True) + metadata = None + return cls(graph=graph, connection=connection, metadata=metadata) + + @classmethod + def _get_temporal_extent( + cls, + *args, + start_date: InputDate = None, + end_date: InputDate = None, + extent: Union[Sequence[InputDate], Parameter, str, None] = None, + ) -> Union[List[Union[str, Parameter, PGNode, None]], Parameter]: + """Parameter aware temporal_extent normalizer""" + # TODO: move this outside of DataCube class + # TODO: return extent as tuple instead of list + if len(args) == 1 and isinstance(args[0], Parameter): + assert start_date is None and end_date is None and extent is None + return args[0] + elif len(args) == 0 and isinstance(extent, Parameter): + assert start_date is None and end_date is None + # TODO: warn about unexpected parameter schema + return extent + else: + def convertor(d: Any) -> Any: + # TODO: can this be generalized through _FromNodeMixin? + if isinstance(d, Parameter) or isinstance(d, PGNode): + # TODO: warn about unexpected parameter schema + return d + elif isinstance(d, ProcessBuilderBase): + return d.pgnode + else: + return rfc3339.normalize(d) + + return list( + get_temporal_extent(*args, start_date=start_date, end_date=end_date, extent=extent, convertor=convertor) + ) + + @openeo_process + def filter_temporal( + self, + *args, + start_date: InputDate = None, + end_date: InputDate = None, + extent: Union[Sequence[InputDate], Parameter, str, None] = None, + ) -> DataCube: + """ + Limit the DataCube to a certain date range, which can be specified in several ways: + + >>> cube.filter_temporal("2019-07-01", "2019-08-01") + >>> cube.filter_temporal(["2019-07-01", "2019-08-01"]) + >>> cube.filter_temporal(extent=["2019-07-01", "2019-08-01"]) + >>> cube.filter_temporal(start_date="2019-07-01", end_date="2019-08-01"]) + + See :ref:`filtering-on-temporal-extent-section` for more details on temporal extent handling and shorthand notation. + + :param start_date: start date of the filter (inclusive), as a string or date object + :param end_date: end date of the filter (exclusive), as a string or date object + :param extent: temporal extent. + Typically, specified as a two-item list or tuple containing start and end date. + + .. versionchanged:: 0.23.0 + Arguments ``start_date``, ``end_date`` and ``extent``: + add support for year/month shorthand notation as discussed at :ref:`date-shorthand-handling`. + """ + if len(args) == 1 and isinstance(args[0], (str)): + raise OpenEoClientException( + f"filter_temporal() with a single string argument ({args[0]!r}) is ambiguous." + f" If you want a half-unbounded interval, use something like filter_temporal({args[0]!r}, None) or use explicit keyword arguments." + f" If you want the full interval covering all of {args[0]!r}, use something like filter_temporal(extent={args[0]!r})." + ) + return self.process( + process_id='filter_temporal', + arguments={ + 'data': THIS, + 'extent': self._get_temporal_extent(*args, start_date=start_date, end_date=end_date, extent=extent) + } + ) + + @openeo_process + def filter_bbox( + self, + *args, + west: Optional[float] = None, + south: Optional[float] = None, + east: Optional[float] = None, + north: Optional[float] = None, + crs: Optional[Union[int, str]] = None, + base: Optional[float] = None, + height: Optional[float] = None, + bbox: Union[Sequence[float], Parameter, None] = None, + ) -> DataCube: + """ + Limits the data cube to the specified bounding box. + + The bounding box can be specified in multiple ways. + + - With keyword arguments:: + + >>> cube.filter_bbox(west=3, south=51, east=4, north=52, crs=4326) + + - With a (west, south, east, north) list or tuple + (note that EPSG:4326 is the default CRS, so it's not necessary to specify it explicitly):: + + >>> cube.filter_bbox([3, 51, 4, 52]) + >>> cube.filter_bbox(bbox=[3, 51, 4, 52]) + + - With a bbox dictionary:: + + >>> bbox = {"west": 3, "south": 51, "east": 4, "north": 52, "crs": 4326} + >>> cube.filter_bbox(bbox) + >>> cube.filter_bbox(bbox=bbox) + >>> cube.filter_bbox(**bbox) + + - With a shapely geometry (of which the bounding box will be used):: + + >>> cube.filter_bbox(geometry) + >>> cube.filter_bbox(bbox=geometry) + + - Passing a parameter:: + + >>> bbox_param = Parameter(name="my_bbox", schema="object") + >>> cube.filter_bbox(bbox_param) + >>> cube.filter_bbox(bbox=bbox_param) + + - With a CRS other than EPSG 4326:: + + >>> cube.filter_bbox( + ... west=652000, east=672000, north=5161000, south=5181000, + ... crs=32632 + ... ) + + - Deprecated: positional arguments are also supported, + but follow a non-standard order for legacy reasons:: + + >>> west, east, north, south = 3, 4, 52, 51 + >>> cube.filter_bbox(west, east, north, south) + + :param crs: value describing the coordinate reference system. + Typically just an int (interpreted as EPSG code, e.g. ``4326``) + or a string (handled as authority string, e.g. ``"EPSG:4326"``). + See :py:func:`openeo.util.normalize_crs` for more details about additional normalization that is applied to this argument. + """ + if args and any(k is not None for k in (west, south, east, north, bbox)): + raise ValueError("Don't mix positional arguments with keyword arguments.") + if bbox and any(k is not None for k in (west, south, east, north)): + raise ValueError("Don't mix `bbox` with `west`/`south`/`east`/`north` keyword arguments.") + + if args: + if 4 <= len(args) <= 5: + # Handle old-style west-east-north-south order + # TODO remove handling of this legacy order? + warnings.warn("Deprecated argument order usage: `filter_bbox(west, east, north, south)`." + " Use keyword arguments or tuple/list argument instead.") + west, east, north, south = args[:4] + if len(args) > 4: + crs = normalize_crs(args[4]) + elif len(args) == 1 and (isinstance(args[0], (list, tuple)) and len(args[0]) == 4 + or isinstance(args[0], (dict, shapely.geometry.base.BaseGeometry, Parameter))): + bbox = args[0] + else: + raise ValueError(args) + + if isinstance(bbox, Parameter): + if not schema_supports(bbox.schema, type="object"): + warnings.warn( + "Unexpected parameterized `extent` in `filter_bbox`:" + f" expected schema compatible with type 'object' but got {bbox.schema!r}." + ) + extent = bbox + else: + if bbox: + if isinstance(bbox, shapely.geometry.base.BaseGeometry): + west, south, east, north = bbox.bounds + elif isinstance(bbox, (list, tuple)) and len(bbox) == 4: + west, south, east, north = bbox[:4] + elif isinstance(bbox, dict): + west, south, east, north = (bbox[k] for k in ["west", "south", "east", "north"]) + if "crs" in bbox: + crs = bbox["crs"] + else: + raise ValueError(bbox) + + extent = {'west': west, 'east': east, 'north': north, 'south': south} + extent.update(dict_no_none(crs=crs, base=base, height=height)) + + return self.process( + process_id='filter_bbox', + arguments={ + 'data': THIS, + 'extent': extent + } + ) + + @openeo_process + def filter_spatial( + self, geometries: Union[shapely.geometry.base.BaseGeometry, dict, str, pathlib.Path, Parameter, VectorCube] + ) -> DataCube: + """ + Limits the data cube over the spatial dimensions to the specified geometries. + + - For polygons, the filter retains a pixel in the data cube if the point at the pixel center intersects with + at least one of the polygons (as defined in the Simple Features standard by the OGC). + - For points, the process considers the closest pixel center. + - For lines (line strings), the process considers all the pixels whose centers are closest to at least one + point on the line. + + More specifically, pixels outside of the bounding box of the given geometry will not be available after filtering. + All pixels inside the bounding box that are not retained will be set to null (no data). + + :param geometries: One or more geometries used for filtering, Can be provided in different ways: + + - a shapely geometry + - a GeoJSON-style dictionary, + - a public URL to the geometries in a vector format that is supported by the backend + (also see :py:func:`Connection.list_file_formats() `), + e.g. GeoJSON, GeoParquet, etc. + A ``load_url`` process will automatically be added to the process graph. + - a path (:py:class:`str` or :py:class:`~pathlib.Path`) to a local, client-side GeoJSON file, + which will be loaded automatically to get the geometries as GeoJSON construct. + - a :py:class:`~openeo.rest.vectorcube.VectorCube` instance. + - a :py:class:`~openeo.api.process.Parameter` instance. + + :return: A data cube restricted to the specified geometries. The dimensions and dimension properties (name, + type, labels, reference system and resolution) remain unchanged, except that the spatial dimensions have less + (or the same) dimension labels. + + .. versionchanged:: 0.36.0 + Support passing a URL as ``geometries`` argument, which will be loaded with the ``load_url`` process. + + .. versionchanged:: 0.36.0 + Support for passing a backend-side path as ``geometries`` argument was removed + (also see :ref:`legacy_read_vector`). + Instead, it's possible to provide a client-side path to a GeoJSON file + (which will be loaded client-side to get the geometries as GeoJSON construct). + """ + valid_geojson_types = [ + "Point", "MultiPoint", "LineString", "MultiLineString", + "Polygon", "MultiPolygon", "GeometryCollection", "FeatureCollection" + ] + geometries = self._get_geometry_argument(geometries, valid_geojson_types=valid_geojson_types, crs=None) + return self.process( + process_id='filter_spatial', + arguments={ + 'data': THIS, + 'geometries': geometries + } + ) + + @openeo_process + def filter_bands(self, bands: Union[List[Union[str, int]], str]) -> DataCube: + """ + Filter the data cube by the given bands + + :param bands: list of band names, common names or band indices. Single band name can also be given as string. + :return: a DataCube instance + """ + if isinstance(bands, str): + bands = [bands] + if self._do_metadata_normalization(): + bands = [self.metadata.band_dimension.band_name(b) for b in bands] + cube = self.process( + process_id="filter_bands", + arguments={"data": THIS, "bands": bands}, + metadata=self.metadata.filter_bands(bands) if self.metadata else None, + ) + return cube + + @openeo_process + def filter_labels( + self, condition: Union[PGNode, Callable], dimension: str, context: Optional[dict] = None + ) -> DataCube: + """ + Filters the dimension labels in the data cube for the given dimension. + Only the dimension labels that match the specified condition are preserved, + all other labels with their corresponding data get removed. + + :param condition: the "child callback" which will be given a single label value (number or string) + and returns a boolean expressing if the label should be preserved. + Also see :ref:`callbackfunctions`. + :param dimension: The name of the dimension to filter on. + + .. versionadded:: 0.27.0 + """ + condition = build_child_callback(condition, parent_parameters=["value"]) + return self.process( + process_id="filter_labels", + arguments=dict_no_none(data=THIS, condition=condition, dimension=dimension, context=context), + ) + + band_filter = legacy_alias(filter_bands, "band_filter", since="0.1.0") + + def band(self, band: Union[str, int]) -> DataCube: + """ + Filter out a single band + + :param band: band name, band common name or band index. + :return: a DataCube instance + """ + if self._do_metadata_normalization(): + band = self.metadata.band_dimension.band_index(band) + arguments = {"data": {"from_parameter": "data"}} + if isinstance(band, int): + arguments["index"] = band + else: + arguments["label"] = band + return self.reduce_bands(reducer=PGNode(process_id="array_element", arguments=arguments)) + + @openeo_process + def resample_spatial( + self, resolution: Union[float, Tuple[float, float]], projection: Union[int, str] = None, + method: str = 'near', align: str = 'upper-left' + ) -> DataCube: + return self.process('resample_spatial', { + 'data': THIS, + 'resolution': resolution, + 'projection': projection, + 'method': method, + 'align': align + }) + + def resample_cube_spatial(self, target: DataCube, method: str = "near") -> DataCube: + """ + Resamples the spatial dimensions (x,y) from a source data cube to align with the corresponding + dimensions of the given target data cube. + Returns a new data cube with the resampled dimensions. + + To resample a data cube to a specific resolution or projection regardless of an existing target + data cube, refer to :py:meth:`resample_spatial`. + + :param target: A data cube that describes the spatial target resolution. + :param method: Resampling method to use. + :return: + """ + return self.process("resample_cube_spatial", {"data": self, "target": target, "method": method}) + + @openeo_process + def resample_cube_temporal( + self, target: DataCube, dimension: Optional[str] = None, valid_within: Optional[int] = None + ) -> DataCube: + """ + Resamples one or more given temporal dimensions from a source data cube to align with the corresponding + dimensions of the given target data cube using the nearest neighbor method. + Returns a new data cube with the resampled dimensions. + + By default, this process simply takes the nearest neighbor independent of the value (including values such as + no-data / ``null``). Depending on the data cubes this may lead to values being assigned to two target timestamps. + To only consider valid values in a specific range around the target timestamps, use the parameter ``valid_within``. + + The rare case of ties is resolved by choosing the earlier timestamps. + + :param target: A data cube that describes the temporal target resolution. + :param dimension: The name of the temporal dimension to resample. + :param valid_within: + :return: + + .. versionadded:: 0.10.0 + """ + return self.process( + "resample_cube_temporal", + dict_no_none({"data": self, "target": target, "dimension": dimension, "valid_within": valid_within}) + ) + + def _operator_binary(self, operator: str, other: Union[DataCube, int, float], reverse=False) -> DataCube: + """Generic handling of (mathematical) binary operator""" + band_math_mode = self._in_bandmath_mode() + if band_math_mode: + if isinstance(other, (int, float)): + return self._bandmath_operator_binary_scalar(operator, other, reverse=reverse) + elif isinstance(other, DataCube): + return self._bandmath_operator_binary_cubes(operator, other) + else: + if isinstance(other, DataCube): + return self._merge_operator_binary_cubes(operator, other) + elif isinstance(other, (int, float)): + # "`apply` math" mode + return self._apply_operator( + operator=operator, other=other, reverse=reverse + ) + raise OperatorException( + f"Unsupported operator {operator!r} with `other` type {type(other)!r} (band math mode={band_math_mode})" + ) + + def _operator_unary(self, operator: str, **kwargs) -> DataCube: + band_math_mode = self._in_bandmath_mode() + if band_math_mode: + return self._bandmath_operator_unary(operator, **kwargs) + else: + return self._apply_operator(operator=operator, extra_arguments=kwargs) + + def _apply_operator( + self, + operator: str, + other: Optional[Union[int, float]] = None, + reverse: Optional[bool] = None, + extra_arguments: Optional[dict] = None, + ) -> DataCube: + """ + Apply a unary or binary operator/process, + by appending to existing `apply` node, or starting a new one. + + :param operator: process id of operator + :param other: for binary operators: "other" argument + :param reverse: for binary operators: "self" and "other" should be swapped (reflected operator mode) + """ + if self.result_node().process_id == "apply": + # Append to existing `apply` node + orig_apply = self.result_node() + data = orig_apply.arguments["data"] + x = {"from_node": orig_apply.arguments["process"]["process_graph"]} + context = orig_apply.arguments.get("context") + else: + # Start new `apply` node. + data = self + x = {"from_parameter": "x"} + context = None + # Build args for child callback. + args = {"x": x, **(extra_arguments or {})} + if other is not None: + # Binary operator mode + args["y"] = other + if reverse: + args["x"], args["y"] = args["y"], args["x"] + child_pg = PGNode(process_id=operator, arguments=args) + return self.process_with_node( + PGNode( + process_id="apply", + arguments=dict_no_none( + data=data, + process={"process_graph": child_pg}, + context=context, + ), + ) + ) + + @openeo_process(mode="operator") + def add(self, other: Union[DataCube, int, float], reverse=False) -> DataCube: + return self._operator_binary("add", other, reverse=reverse) + + @openeo_process(mode="operator") + def subtract(self, other: Union[DataCube, int, float], reverse=False) -> DataCube: + return self._operator_binary("subtract", other, reverse=reverse) + + @openeo_process(mode="operator") + def divide(self, other: Union[DataCube, int, float], reverse=False) -> DataCube: + return self._operator_binary("divide", other, reverse=reverse) + + @openeo_process(mode="operator") + def multiply(self, other: Union[DataCube, int, float], reverse=False) -> DataCube: + return self._operator_binary("multiply", other, reverse=reverse) + + @openeo_process + def normalized_difference(self, other: DataCube) -> DataCube: + # This DataCube method is only a convenience function when in band math mode + assert self._in_bandmath_mode() + assert other._in_bandmath_mode() + return self._operator_binary("normalized_difference", other) + + @openeo_process(process_id="or", mode="operator") + def logical_or(self, other: DataCube) -> DataCube: + """ + Apply element-wise logical `or` operation + + :param other: + :return: logical_or(this, other) + """ + return self._operator_binary("or", other) + + @openeo_process(process_id="and", mode="operator") + def logical_and(self, other: DataCube) -> DataCube: + """ + Apply element-wise logical `and` operation + + :param other: + :return: logical_and(this, other) + """ + return self._operator_binary("and", other) + + @openeo_process(process_id="not", mode="operator") + def __invert__(self) -> DataCube: + return self._operator_unary("not") + + @openeo_process(process_id="neq", mode="operator") + def __ne__(self, other: Union[DataCube, int, float]) -> DataCube: + return self._operator_binary("neq", other) + + @openeo_process(process_id="eq", mode="operator") + def __eq__(self, other: Union[DataCube, int, float]) -> DataCube: + """ + Pixelwise comparison of this data cube with another cube or constant. + + :param other: Another data cube, or a constant + :return: + """ + return self._operator_binary("eq", other) + + @openeo_process(process_id="gt", mode="operator") + def __gt__(self, other: Union[DataCube, int, float]) -> DataCube: + """ + Pairwise comparison of the bands in this data cube with the bands in the 'other' data cube. + + :param other: + :return: this > other + """ + return self._operator_binary("gt", other) + + @openeo_process(process_id="ge", mode="operator") + def __ge__(self, other: Union[DataCube, int, float]) -> DataCube: + return self._operator_binary("gte", other) + + @openeo_process(process_id="lt", mode="operator") + def __lt__(self, other: Union[DataCube, int, float]) -> DataCube: + """ + Pairwise comparison of the bands in this data cube with the bands in the 'other' data cube. + The number of bands in both data cubes has to be the same. + + :param other: + :return: this < other + """ + return self._operator_binary("lt", other) + + @openeo_process(process_id="le", mode="operator") + def __le__(self, other: Union[DataCube, int, float]) -> DataCube: + return self._operator_binary("lte", other) + + @openeo_process(process_id="add", mode="operator") + def __add__(self, other) -> DataCube: + return self.add(other) + + @openeo_process(process_id="add", mode="operator") + def __radd__(self, other) -> DataCube: + return self.add(other, reverse=True) + + @openeo_process(process_id="subtract", mode="operator") + def __sub__(self, other) -> DataCube: + return self.subtract(other) + + @openeo_process(process_id="subtract", mode="operator") + def __rsub__(self, other) -> DataCube: + return self.subtract(other, reverse=True) + + @openeo_process(process_id="multiply", mode="operator") + def __neg__(self) -> DataCube: + return self.multiply(-1) + + @openeo_process(process_id="multiply", mode="operator") + def __mul__(self, other) -> DataCube: + return self.multiply(other) + + @openeo_process(process_id="multiply", mode="operator") + def __rmul__(self, other) -> DataCube: + return self.multiply(other, reverse=True) + + @openeo_process(process_id="divide", mode="operator") + def __truediv__(self, other) -> DataCube: + return self.divide(other) + + @openeo_process(process_id="divide", mode="operator") + def __rtruediv__(self, other) -> DataCube: + return self.divide(other, reverse=True) + + @openeo_process(process_id="power", mode="operator") + def __rpow__(self, other) -> DataCube: + return self._power(other, reverse=True) + + @openeo_process(process_id="power", mode="operator") + def __pow__(self, other) -> DataCube: + return self._power(other, reverse=False) + + def _power(self, other, reverse=False): + node = self._get_bandmath_node() + x = node.reducer_process_graph() + y = other + if reverse: + x, y = y, x + return self.process_with_node(node.clone_with_new_reducer( + PGNode(process_id="power", base=x, p=y) + )) + + @openeo_process(process_id="power", mode="operator") + def power(self, p: float): + return self._power(other=p, reverse=False) + + @openeo_process(process_id="ln", mode="operator") + def ln(self) -> DataCube: + return self._operator_unary("ln") + + @openeo_process(process_id="log", mode="operator") + def logarithm(self, base: float) -> DataCube: + return self._operator_unary("log", base=base) + + @openeo_process(process_id="log", mode="operator") + def log2(self) -> DataCube: + return self.logarithm(base=2) + + @openeo_process(process_id="log", mode="operator") + def log10(self) -> DataCube: + return self.logarithm(base=10) + + @openeo_process(process_id="or", mode="operator") + def __or__(self, other) -> DataCube: + return self.logical_or(other) + + @openeo_process(process_id="and", mode="operator") + def __and__(self, other): + return self.logical_and(other) + + def _bandmath_operator_binary_cubes( + self, operator, other: DataCube, left_arg_name="x", right_arg_name="y" + ) -> DataCube: + """Band math binary operator with cube as right hand side argument""" + left = self._get_bandmath_node() + right = other._get_bandmath_node() + if left.arguments["data"] != right.arguments["data"]: + raise BandMathException("'Band math' between bands of different data cubes is not supported yet.") + + # Build reducer's sub-processgraph + merged = PGNode( + process_id=operator, + arguments={ + left_arg_name: {"from_node": left.reducer_process_graph()}, + right_arg_name: {"from_node": right.reducer_process_graph()}, + }, + ) + return self.process_with_node(left.clone_with_new_reducer(merged)) + + def _bandmath_operator_binary_scalar(self, operator: str, other: Union[int, float], reverse=False) -> DataCube: + """Band math binary operator with scalar value (int or float) as right hand side argument""" + node = self._get_bandmath_node() + x = {'from_node': node.reducer_process_graph()} + y = other + if reverse: + x, y = y, x + return self.process_with_node(node.clone_with_new_reducer( + PGNode(operator, x=x, y=y) + )) + + def _bandmath_operator_unary(self, operator: str, **kwargs) -> DataCube: + node = self._get_bandmath_node() + return self.process_with_node(node.clone_with_new_reducer( + PGNode(operator, x={'from_node': node.reducer_process_graph()}, **kwargs) + )) + + def _in_bandmath_mode(self) -> bool: + """So-called "band math" mode: current result node is reduce_dimension along "bands" dimension.""" + # TODO #123 is it (still) necessary to make "band" math a special case? + return isinstance(self._pg, ReduceNode) and self._pg.band_math_mode + + def _get_bandmath_node(self) -> ReduceNode: + """Check we are in bandmath mode and return the node""" + if not self._in_bandmath_mode(): + raise BandMathException("Must be in band math mode already") + return self._pg + + def _merge_operator_binary_cubes( + self, operator: str, other: DataCube, left_arg_name="x", right_arg_name="y" + ) -> DataCube: + """Merge two cubes with given operator as overlap_resolver.""" + # TODO #123 reuse an existing merge_cubes process graph if it already exists? + return self.merge_cubes(other, overlap_resolver=PGNode( + process_id=operator, + arguments={ + left_arg_name: {"from_parameter": "x"}, + right_arg_name: {"from_parameter": "y"}, + } + )) + + def _get_geometry_argument( + self, + argument: Union[ + shapely.geometry.base.BaseGeometry, + dict, + str, + pathlib.Path, + Parameter, + _FromNodeMixin, + ], + valid_geojson_types: List[str], + crs: Optional[str] = None, + ) -> Union[dict, Parameter, PGNode]: + """ + Convert input to a geometry as "geojson" subtype object or vectorcube. + + :param crs: value that encodes a coordinate reference system. + See :py:func:`openeo.util.normalize_crs` for more details about additional normalization that is applied to this argument. + """ + if isinstance(argument, Parameter): + return argument + elif isinstance(argument, _FromNodeMixin): + return argument.from_node() + + if isinstance(argument, str) and re.match(r"^https?://", argument, flags=re.I): + # Geometry provided as URL: load with `load_url` (with best-effort format guess) + url = urllib.parse.urlparse(argument) + suffix = pathlib.Path(url.path.lower()).suffix + format = { + ".json": "GeoJSON", + ".geojson": "GeoJSON", + ".pq": "Parquet", + ".parquet": "Parquet", + ".geoparquet": "Parquet", + }.get(suffix, suffix.split(".")[-1]) + return self.connection.load_url(url=argument, format=format) + + if ( + isinstance(argument, (str, pathlib.Path)) + and pathlib.Path(argument).is_file() + and pathlib.Path(argument).suffix.lower() in [".json", ".geojson"] + ): + geometry = load_json(argument) + elif isinstance(argument, shapely.geometry.base.BaseGeometry): + geometry = mapping(argument) + elif isinstance(argument, dict): + geometry = argument + else: + raise OpenEoClientException(f"Invalid geometry argument: {argument!r}") + + if geometry.get("type") not in valid_geojson_types: + raise OpenEoClientException("Invalid geometry type {t!r}, must be one of {s}".format( + t=geometry.get("type"), s=valid_geojson_types + )) + if crs: + # TODO: don't warn when the crs is Lon-Lat like EPSG:4326? + warnings.warn(f"Geometry with non-Lon-Lat CRS {crs!r} is only supported by specific back-ends.") + # TODO #204 alternative for non-standard CRS in GeoJSON object? + epsg_code = normalize_crs(crs) + if epsg_code is not None: + # proj did recognize the CRS + crs_name = f"EPSG:{epsg_code}" + else: + # proj did not recognise this CRS + warnings.warn(f"non-Lon-Lat CRS {crs!r} is not known to the proj library and might not be supported.") + crs_name = crs + geometry["crs"] = {"type": "name", "properties": {"name": crs_name}} + return geometry + + @openeo_process + def aggregate_spatial( + self, + geometries: Union[ + shapely.geometry.base.BaseGeometry, + dict, + str, + pathlib.Path, + Parameter, + VectorCube, + ], + reducer: Union[str, typing.Callable, PGNode], + target_dimension: Optional[str] = None, + crs: Optional[Union[int, str]] = None, + context: Optional[dict] = None, + # TODO arguments: target dimension, context + ) -> VectorCube: + """ + Aggregates statistics for one or more geometries (e.g. zonal statistics for polygons) + over the spatial dimensions. + + :param geometries: The geometries to aggregate in. Can be provided in different ways: + + - a shapely geometry + - a GeoJSON-style dictionary, + - a public URL to the geometries in a vector format that is supported by the backend + (also see :py:func:`Connection.list_file_formats() `), + e.g. GeoJSON, GeoParquet, etc. + A ``load_url`` process will automatically be added to the process graph. + - a path (:py:class:`str` or :py:class:`~pathlib.Path`) to a local, client-side GeoJSON file, + which will be loaded automatically to get the geometries as GeoJSON construct. + - a :py:class:`~openeo.rest.vectorcube.VectorCube` instance. + - a :py:class:`~openeo.api.process.Parameter` instance. + + :param reducer: the "child callback": + the name of a single openEO process, + or a callback function as discussed in :ref:`callbackfunctions`, + or a :py:class:`UDF ` instance. + + The callback should correspond to a process that + receives an array of numerical values + and returns a single numerical value. + For example: + + - ``"mean"`` (string) + - :py:func:`absolute ` (:ref:`predefined openEO process function `) + - ``lambda data: data.min()`` (function or lambda) + + :param target_dimension: The new dimension name to be used for storing the results. + :param crs: The spatial reference system of the provided polygon. + By default, longitude-latitude (EPSG:4326) is assumed. + See :py:func:`openeo.util.normalize_crs` for more details about additional normalization that is applied to this argument. + + .. note:: this ``crs`` argument is a non-standard/experimental feature, only supported by specific back-ends. + See https://github.com/Open-EO/openeo-processes/issues/235 for details. + + :param context: Additional data to be passed to the reducer process. + + .. versionchanged:: 0.36.0 + Support passing a URL as ``geometries`` argument, which will be loaded with the ``load_url`` process. + + .. versionchanged:: 0.36.0 + Support for passing a backend-side path as ``geometries`` argument was removed + (also see :ref:`legacy_read_vector`). + Instead, it's possible to provide a client-side path to a GeoJSON file + (which will be loaded client-side to get the geometries as GeoJSON construct). + """ + valid_geojson_types = [ + "Point", "MultiPoint", "LineString", "MultiLineString", + "Polygon", "MultiPolygon", "GeometryCollection", "Feature", "FeatureCollection" + ] + geometries = self._get_geometry_argument(geometries, valid_geojson_types=valid_geojson_types, crs=crs) + reducer = build_child_callback(reducer, parent_parameters=["data"]) + return VectorCube( + graph=self._build_pgnode( + process_id="aggregate_spatial", + data=THIS, + geometries=geometries, + reducer=reducer, + arguments=dict_no_none( + target_dimension=target_dimension, context=context + ), + ), + connection=self._connection, + # TODO: also add new "geometry" dimension #457 + metadata=None if self.metadata is None else self.metadata.reduce_spatial(), + ) + + @openeo_process + def aggregate_spatial_window( + self, + reducer: Union[str, typing.Callable, PGNode], + size: List[int], + boundary: str = "pad", + align: str = "upper-left", + context: Optional[dict] = None, + # TODO arguments: target dimension, context + ) -> DataCube: + """ + Aggregates statistics over the horizontal spatial dimensions (axes x and y) of the data cube. + + The pixel grid for the axes x and y is divided into non-overlapping windows with the size + specified in the parameter size. If the number of values for the axes x and y is not a multiple + of the corresponding window size, the behavior specified in the parameters boundary and align + is applied. For each of these windows, the reducer process computes the result. + + :param reducer: the "child callback": + the name of a single openEO process, + or a callback function as discussed in :ref:`callbackfunctions`, + or a :py:class:`UDF ` instance. + :param size: Window size in pixels along the horizontal spatial dimensions. + The first value corresponds to the x axis, the second value corresponds to the y axis. + :param boundary: Behavior to apply if the number of values for the axes x and y is not a + multiple of the corresponding value in the size parameter. + Options are: + + - ``pad`` (default): pad the data cube with the no-data value null to fit the required window size. + - ``trim``: trim the data cube to fit the required window size. + + Use the parameter ``align`` to align the data to the desired corner. + + :param align: If the data requires padding or trimming (see parameter ``boundary``), specifies + to which corner of the spatial extent the data is aligned to. For example, if the data is + aligned to the upper left, the process pads/trims at the lower-right. + :param context: Additional data to be passed to the process. + + :return: A data cube with the newly computed values and the same dimensions. + """ + valid_boundary_types = ["pad", "trim"] + valid_align_types = ["lower-left", "upper-left", "lower-right", "upper-right"] + if boundary not in valid_boundary_types: + raise ValueError(f"Provided boundary type not supported. Please use one of {valid_boundary_types} .") + if align not in valid_align_types: + raise ValueError(f"Provided align type not supported. Please use one of {valid_align_types} .") + if len(size) != 2: + raise ValueError(f"Provided size not supported. Please provide a list of 2 integer values.") + + reducer = build_child_callback(reducer, parent_parameters=["data"]) + arguments = { + "data": THIS, + "boundary": boundary, + "align": align, + "size": size, + "reducer": reducer, + "context": context, + } + return self.process(process_id="aggregate_spatial_window", arguments=arguments) + + @openeo_process + def apply_dimension( + self, + code: Optional[str] = None, + runtime=None, + # TODO: drop None default of process (when `code` and `runtime` args can be dropped) + process: Union[str, typing.Callable, UDF, PGNode] = None, + version: Optional[str] = None, + # TODO: dimension has no default (per spec)? + dimension: str = "t", + target_dimension: Optional[str] = None, + context: Optional[dict] = None, + ) -> DataCube: + """ + Applies a process to all pixel values along a dimension of a raster data cube. For example, + if the temporal dimension is specified the process will work on a time series of pixel values. + + The process to apply is specified by either `code` and `runtime` in case of a UDF, or by providing a callback function + in the `process` argument. + + The process reduce_dimension also applies a process to pixel values along a dimension, but drops + the dimension afterwards. The process apply applies a process to each pixel value in the data cube. + + The target dimension is the source dimension if not specified otherwise in the target_dimension parameter. + The pixel values in the target dimension get replaced by the computed pixel values. The name, type and + reference system are preserved. + + The dimension labels are preserved when the target dimension is the source dimension and the number of + pixel values in the source dimension is equal to the number of values computed by the process. Otherwise, + the dimension labels will be incrementing integers starting from zero, which can be changed using + rename_labels afterwards. The number of labels will equal to the number of values computed by the process. + + :param code: [**deprecated**] UDF code or process identifier (optional) + :param runtime: [**deprecated**] UDF runtime to use (optional) + :param process: the "child callback": + the name of a single process, + or a callback function as discussed in :ref:`callbackfunctions`, + or a :py:class:`UDF ` instance. + + The callback should correspond to a process that + receives an array of numerical values + and returns an array of numerical values. + For example: + + - ``"sort"`` (string) + - :py:func:`sort ` (:ref:`predefined openEO process function `) + - ``lambda data: data.concat([42, -3])`` (function or lambda) + + + :param version: [**deprecated**] Version of the UDF runtime to use + :param dimension: The name of the source dimension to apply the process on. Fails with a DimensionNotAvailable error if the specified dimension does not exist. + :param target_dimension: The name of the target dimension or null (the default) to use the source dimension + specified in the parameter dimension. By specifying a target dimension, the source dimension is removed. + The target dimension with the specified name and the type other (see add_dimension) is created, if it doesn't exist yet. + :param context: Additional data to be passed to the process. + + :return: A datacube with the UDF applied to the given dimension. + :raises: DimensionNotAvailable + + .. versionchanged:: 0.13.0 + arguments ``code``, ``runtime`` and ``version`` are deprecated if favor of the standard approach + of using an :py:class:`UDF ` object in the ``process`` argument. + See :ref:`old_udf_api` for more background about the changes. + + """ + # TODO #137 #181 #312 remove support for code/runtime/version + if runtime or (isinstance(code, str) and "\n" in code) or version: + if process: + raise ValueError( + "Cannot specify `process` argument together with deprecated `code`/`runtime`/`version` arguments." + ) + else: + warnings.warn( + "Specifying UDF code through `code`, `runtime` and `version` arguments is deprecated. " + "Instead create an `openeo.UDF` object and pass that to the `process` argument.", + category=UserDeprecationWarning, + stacklevel=2, + ) + process = UDF(code=code, runtime=runtime, version=version, context=context) + else: + process = process or code + process = build_child_callback( + process=process, parent_parameters=["data", "context"], connection=self.connection + ) + arguments = { + "data": THIS, + "process": process, + "dimension": self._assert_valid_dimension_name(dimension), + } + + metadata = self.metadata + if target_dimension is not None: + arguments["target_dimension"] = target_dimension + metadata = self.metadata.reduce_dimension(dimension_name=dimension) if self.metadata else None + if(not target_dimension in self.metadata.dimension_names()): + metadata = self.metadata.add_dimension(target_dimension, label="unknown") + if context is not None: + arguments["context"] = context + result_cube = self.process(process_id="apply_dimension", arguments=arguments, metadata = metadata) + + return result_cube + + @openeo_process + def reduce_dimension( + self, + dimension: str, + reducer: Union[str, typing.Callable, UDF, PGNode], + context: Optional[dict] = None, + process_id="reduce_dimension", + band_math_mode: bool = False, + ) -> DataCube: + """ + Add a reduce process with given reducer callback along given dimension + + :param dimension: the label of the dimension to reduce + :param reducer: the "child callback": + the name of a single openEO process, + or a callback function as discussed in :ref:`callbackfunctions`, + or a :py:class:`UDF ` instance. + + The callback should correspond to a process that + receives an array of numerical values + and returns a single numerical value. + For example: + + - ``"mean"`` (string) + - :py:func:`absolute ` (:ref:`predefined openEO process function `) + - ``lambda data: data.min()`` (function or lambda) + + :param context: Additional data to be passed to the process. + """ + # TODO: check if dimension is valid according to metadata? #116 + # TODO: #125 use/test case for `reduce_dimension_binary`? + reducer = build_child_callback( + process=reducer, parent_parameters=["data", "context"], connection=self.connection + ) + + return self.process_with_node( + ReduceNode( + process_id=process_id, + data=self, + reducer=reducer, + dimension=self._assert_valid_dimension_name(dimension), + context=context, + # TODO #123 is it (still) necessary to make "band" math a special case? + band_math_mode=band_math_mode, + ), + metadata=self.metadata.reduce_dimension(dimension_name=dimension) if self.metadata else None, + ) + + @openeo_process + def reduce_spatial( + self, + reducer: Union[str, typing.Callable, UDF, PGNode], + context: Optional[dict] = None, + ) -> "DataCube": + """ + Add a reduce process with given reducer callback along the spatial dimensions + + :param reducer: the "child callback": + the name of a single openEO process, + or a callback function as discussed in :ref:`callbackfunctions`, + or a :py:class:`UDF ` instance. + + The callback should correspond to a process that + receives an array of numerical values + and returns a single numerical value. + For example: + + - ``"mean"`` (string) + - :py:func:`absolute ` (:ref:`predefined openEO process function `) + - ``lambda data: data.min()`` (function or lambda) + + :param context: Additional data to be passed to the process. + """ + reducer = build_child_callback( + process=reducer, parent_parameters=["data", "context"], connection=self.connection + ) + return self.process( + process_id="reduce_spatial", + data=self, + reducer=reducer, + context=context, + metadata=self.metadata.reduce_spatial(), + ) + + @deprecated("Use :py:meth:`apply_polygon`.", version="0.26.0") + def chunk_polygon( + self, + chunks: Union[shapely.geometry.base.BaseGeometry, dict, str, pathlib.Path, Parameter, VectorCube], + process: Union[str, PGNode, typing.Callable, UDF], + mask_value: float = None, + context: Optional[dict] = None, + ) -> DataCube: + """""" + process = build_child_callback(process, parent_parameters=["data"], connection=self.connection) + valid_geojson_types = [ + "Polygon", + "MultiPolygon", + "GeometryCollection", + "Feature", + "FeatureCollection", + ] + chunks = self._get_geometry_argument( + chunks, valid_geojson_types=valid_geojson_types + ) + mask_value = float(mask_value) if mask_value is not None else None + return self.process( + process_id="chunk_polygon", + data=THIS, + chunks=chunks, + process=process, + arguments=dict_no_none( + mask_value=mask_value, + context=context, + ), + ) + + @openeo_process + def apply_polygon( + self, + geometries: Union[shapely.geometry.base.BaseGeometry, dict, str, pathlib.Path, Parameter, VectorCube] = None, + process: Union[str, PGNode, typing.Callable, UDF] = None, + mask_value: Optional[float] = None, + context: Optional[dict] = None, + **kwargs, + ) -> DataCube: + """ + Apply a process to segments of the data cube that are defined by the given polygons. + For each polygon provided, all pixels for which the point at the pixel center intersects + with the polygon (as defined in the Simple Features standard by the OGC) are collected into sub data cubes. + If a pixel is part of multiple of the provided polygons (e.g., when the polygons overlap), + the GeometriesOverlap exception is thrown. + Each sub data cube is passed individually to the given process. + + :param geometries: Can be provided in different ways: + + - a shapely geometry + - a GeoJSON-style dictionary, + - a public URL to the geometries in a vector format that is supported by the backend + (also see :py:func:`Connection.list_file_formats() `), + e.g. GeoJSON, GeoParquet, etc. + A ``load_url`` process will automatically be added to the process graph. + - a path (:py:class:`str` or :py:class:`~pathlib.Path`) to a local, client-side GeoJSON file, + which will be loaded automatically to get the geometries as GeoJSON construct. + - a :py:class:`~openeo.rest.vectorcube.VectorCube` instance. + - a :py:class:`~openeo.api.process.Parameter` instance. + + :param process: "child callback" function, see :ref:`callbackfunctions` + :param mask_value: The value used for pixels outside the polygon. + :param context: Additional data to be passed to the process. + + .. warning:: experimental process: not generally supported, API subject to change. + + .. versionchanged:: 0.32.0 + Argument ``polygons`` was renamed to ``geometries``. + While deprecated, the old name ``polygons`` is still supported + as keyword argument for backwards compatibility. + + .. versionchanged:: 0.36.0 + Support passing a URL as ``geometries`` argument, which will be loaded with the ``load_url`` process. + + .. versionchanged:: 0.36.0 + Support for passing a backend-side path as ``geometries`` argument was removed + (also see :ref:`legacy_read_vector`). + Instead, it's possible to provide a client-side path to a GeoJSON file + (which will be loaded client-side to get the geometries as GeoJSON construct). + """ + # TODO drop support for legacy `polygons` argument: + # remove `kwargs, remove default `None` value for `geometries` and `process` + # and the related backwards compatibility code + geometries_parameter = "geometries" + if geometries is None and "polygons" in kwargs: + geometries = kwargs.pop("polygons") + geometries_parameter = "polygons" + warnings.warn( + "In `apply_polygon` use argument `geometries` instead of deprecated 'polygons'.", + category=UserDeprecationWarning, + stacklevel=2, + ) + if kwargs: + raise ValueError(f"Unexpected keyword arguments: {kwargs!r}") + if not geometries: + raise ValueError("No geometries provided.") + + # Note: the `process` argument was given a default value `None` (with the `polygons`/`geometries` argument rename) + # to keep support for legacy `cube.apply_polygon(polygons=..., process=...)` usage: + # `geometries` had to be given a default value, and so did `process` as it comes after it. + # TODO: remove default value for `process` when dropping support for legacy `polygons` argument + assert process is not None + + process = build_child_callback(process, parent_parameters=["data"], connection=self.connection) + valid_geojson_types = ["Polygon", "MultiPolygon", "Feature", "FeatureCollection"] + geometries = self._get_geometry_argument(geometries, valid_geojson_types=valid_geojson_types) + mask_value = float(mask_value) if mask_value is not None else None + return self.process( + process_id="apply_polygon", + data=THIS, + **{geometries_parameter: geometries}, + process=process, + arguments=dict_no_none( + mask_value=mask_value, + context=context, + ), + ) + + def reduce_bands(self, reducer: Union[str, PGNode, typing.Callable, UDF]) -> DataCube: + """ + Shortcut for :py:meth:`reduce_dimension` along the band dimension + + :param reducer: "child callback" function, see :ref:`callbackfunctions` + """ + return self.reduce_dimension( + dimension=self.metadata.band_dimension.name if self.metadata else "bands", + reducer=reducer, + band_math_mode=True, + ) + + def reduce_temporal(self, reducer: Union[str, PGNode, typing.Callable, UDF]) -> DataCube: + """ + Shortcut for :py:meth:`reduce_dimension` along the temporal dimension + + :param reducer: "child callback" function, see :ref:`callbackfunctions` + """ + return self.reduce_dimension( + dimension=self.metadata.temporal_dimension.name if self.metadata else "t", + reducer=reducer, + ) + + @deprecated( + "Use :py:meth:`reduce_bands` with :py:class:`UDF ` as reducer.", + version="0.13.0", + ) + def reduce_bands_udf(self, code: str, runtime: Optional[str] = None, version: Optional[str] = None) -> DataCube: + """ + Use `reduce_dimension` process with given UDF along band/spectral dimension. + """ + # TODO #181 #312 drop this deprecated pattern + return self.reduce_bands(reducer=UDF(code=code, runtime=runtime, version=version)) + + @openeo_process + def add_dimension(self, name: str, label: str, type: Optional[str] = None): + """ + Adds a new named dimension to the data cube. + Afterwards, the dimension can be referenced with the specified name. If a dimension with the specified name exists, + the process fails with a DimensionExists error. The dimension label of the dimension is set to the specified label. + + This call does not modify the datacube in place, but returns a new datacube with the additional dimension. + + :param name: The name of the dimension to add + :param label: The dimension label. + :param type: Dimension type, allowed values: 'spatial', 'temporal', 'bands', 'other', default value is 'other' + :return: The data cube with a newly added dimension. The new dimension has exactly one dimension label. All other dimensions remain unchanged. + """ + return self.process( + process_id="add_dimension", + arguments=dict_no_none({"data": self, "name": name, "label": label, "type": type}), + metadata=self.metadata.add_dimension(name=name, label=label, type=type) if self.metadata else None, + ) + + @openeo_process + def drop_dimension(self, name: str): + """ + Drops a dimension from the data cube. + Dropping a dimension only works on dimensions with a single dimension label left, otherwise the process fails + with a DimensionLabelCountMismatch exception. Dimension values can be reduced to a single value with a filter + such as filter_bands or the reduce_dimension process. If a dimension with the specified name does not exist, + the process fails with a DimensionNotAvailable exception. + + :param name: The name of the dimension to drop + :return: The data cube with the given dimension dropped. + """ + return self.process( + process_id="drop_dimension", + arguments={"data": self, "name": name}, + metadata=self.metadata.drop_dimension(name=name) if self.metadata else None, + ) + + @deprecated( + "Use :py:meth:`reduce_temporal` with :py:class:`UDF ` as reducer", + version="0.13.0", + ) + def reduce_temporal_udf(self, code: str, runtime="Python", version="latest"): + """ + Apply reduce (`reduce_dimension`) process with given UDF along temporal dimension. + + :param code: The UDF code, compatible with the given runtime and version + :param runtime: The UDF runtime + :param version: The UDF runtime version + """ + # TODO #181 #312 drop this deprecated pattern + return self.reduce_temporal(reducer=UDF(code=code, runtime=runtime, version=version)) + + reduce_tiles_over_time = legacy_alias( + reduce_temporal_udf, name="reduce_tiles_over_time", since="0.1.1" + ) + + @openeo_process + def apply_neighborhood( + self, + process: Union[str, PGNode, typing.Callable, UDF], + size: List[Dict], + overlap: List[dict] = None, + context: Optional[dict] = None, + ) -> DataCube: + """ + Applies a focal process to a data cube. + + A focal process is a process that works on a 'neighbourhood' of pixels. The neighbourhood can extend into multiple dimensions, this extent is specified by the `size` argument. It is not only (part of) the size of the input window, but also the size of the output for a given position of the sliding window. The sliding window moves with multiples of `size`. + + An overlap can be specified so that neighbourhoods can have overlapping boundaries. This allows for continuity of the output. The values included in the data cube as overlap can't be modified by the given `process`. + + The neighbourhood size should be kept small enough, to avoid running beyond computational resources, but a too small size will result in a larger number of process invocations, which may slow down processing. Window sizes for spatial dimensions typically are in the range of 64 to 512 pixels, while overlaps of 8 to 32 pixels are common. + + The process must not add new dimensions, or remove entire dimensions, but the result can have different dimension labels. + + For the special case of 2D convolution, it is recommended to use ``apply_kernel()``. + + :param size: + :param overlap: + :param process: a callback function that creates a process graph, see :ref:`callbackfunctions` + :param context: Additional data to be passed to the process. + + :return: + """ + return self.process( + process_id="apply_neighborhood", + arguments=dict_no_none( + data=THIS, + process=build_child_callback(process=process, parent_parameters=["data"], connection=self.connection), + size=size, + overlap=overlap, + context=context, + ) + ) + + @openeo_process + def apply( + self, + process: Union[str, typing.Callable, UDF, PGNode], + context: Optional[dict] = None, + ) -> DataCube: + """ + Applies a unary process (a local operation) to each value of the specified or all dimensions in the data cube. + + :param process: the "child callback": + the name of a single process, + or a callback function as discussed in :ref:`callbackfunctions`, + or a :py:class:`UDF ` instance. + + The callback should correspond to a process that + receives a single numerical value + and returns a single numerical value. + For example: + + - ``"absolute"`` (string) + - :py:func:`absolute ` (:ref:`predefined openEO process function `) + - ``lambda x: x * 2 + 3`` (function or lambda) + + :param context: Additional data to be passed to the process. + + :return: A data cube with the newly computed values. The resolution, cardinality and the number of dimensions are the same as for the original data cube. + """ + return self.process( + process_id="apply", + arguments=dict_no_none( + { + "data": THIS, + "process": build_child_callback(process, parent_parameters=["x"], connection=self.connection), + "context": context, + } + ), + ) + + reduce_temporal_simple = legacy_alias( + reduce_temporal, "reduce_temporal_simple", since="0.13.0" + ) + + @openeo_process(process_id="min", mode="reduce_dimension") + def min_time(self) -> DataCube: + """ + Finds the minimum value of a time series for all bands of the input dataset. + + :return: a DataCube instance + """ + return self.reduce_temporal("min") + + @openeo_process(process_id="max", mode="reduce_dimension") + def max_time(self) -> DataCube: + """ + Finds the maximum value of a time series for all bands of the input dataset. + + :return: a DataCube instance + """ + return self.reduce_temporal("max") + + @openeo_process(process_id="mean", mode="reduce_dimension") + def mean_time(self) -> DataCube: + """ + Finds the mean value of a time series for all bands of the input dataset. + + :return: a DataCube instance + """ + return self.reduce_temporal("mean") + + @openeo_process(process_id="median", mode="reduce_dimension") + def median_time(self) -> DataCube: + """ + Finds the median value of a time series for all bands of the input dataset. + + :return: a DataCube instance + """ + return self.reduce_temporal("median") + + @openeo_process(process_id="count", mode="reduce_dimension") + def count_time(self) -> DataCube: + """ + Counts the number of images with a valid mask in a time series for all bands of the input dataset. + + :return: a DataCube instance + """ + return self.reduce_temporal("count") + + @openeo_process + def aggregate_temporal( + self, + intervals: List[list], + reducer: Union[str, typing.Callable, PGNode], + labels: Optional[List[str]] = None, + dimension: Optional[str] = None, + context: Optional[dict] = None, + ) -> DataCube: + """ + Computes a temporal aggregation based on an array of date and/or time intervals. + + Calendar hierarchies such as year, month, week etc. must be transformed into specific intervals by the clients. For each interval, all data along the dimension will be passed through the reducer. The computed values will be projected to the labels, so the number of labels and the number of intervals need to be equal. + + If the dimension is not set, the data cube is expected to only have one temporal dimension. + + :param intervals: Temporal left-closed intervals so that the start time is contained, but not the end time. + :param reducer: the "child callback": + the name of a single openEO process, + or a callback function as discussed in :ref:`callbackfunctions`, + or a :py:class:`UDF ` instance. + + The callback should correspond to a process that + receives an array of numerical values + and returns a single numerical value. + For example: + + - ``"mean"`` (string) + - :py:func:`absolute ` (:ref:`predefined openEO process function `) + - ``lambda data: data.min()`` (function or lambda) + + :param labels: Labels for the intervals. The number of labels and the number of groups need to be equal. + :param dimension: The temporal dimension for aggregation. All data along the dimension will be passed through the specified reducer. If the dimension is not set, the data cube is expected to only have one temporal dimension. + :param context: Additional data to be passed to the reducer. Not set by default. + + :return: A :py:class:`DataCube` containing a result for each time window + """ + return self.process( + process_id="aggregate_temporal", + arguments=dict_no_none( + data=THIS, + intervals=intervals, + labels=labels, + dimension=dimension, + reducer=build_child_callback(reducer, parent_parameters=["data"]), + context=context, + ), + ) + + @openeo_process + def aggregate_temporal_period( + self, + period: str, + reducer: Union[str, PGNode, typing.Callable], + dimension: Optional[str] = None, + context: Optional[Dict] = None, + ) -> DataCube: + """ + Computes a temporal aggregation based on calendar hierarchies such as years, months or seasons. For other calendar hierarchies aggregate_temporal can be used. + + For each interval, all data along the dimension will be passed through the reducer. + + If the dimension is not set or is set to null, the data cube is expected to only have one temporal dimension. + + The period argument specifies the time intervals to aggregate. The following pre-defined values are available: + + - hour: Hour of the day + - day: Day of the year + - week: Week of the year + - dekad: Ten day periods, counted per year with three periods per month (day 1 - 10, 11 - 20 and 21 - end of month). The third dekad of the month can range from 8 to 11 days. For example, the fourth dekad is Feb, 1 - Feb, 10 each year. + - month: Month of the year + - season: Three month periods of the calendar seasons (December - February, March - May, June - August, September - November). + - tropical-season: Six month periods of the tropical seasons (November - April, May - October). + - year: Proleptic years + - decade: Ten year periods (0-to-9 decade), from a year ending in a 0 to the next year ending in a 9. + - decade-ad: Ten year periods (1-to-0 decade) better aligned with the Anno Domini (AD) calendar era, from a year ending in a 1 to the next year ending in a 0. + + + :param period: The period of the time intervals to aggregate. + :param reducer: A reducer to be applied on all values along the specified dimension. The reducer must be a callable process (or a set processes) that accepts an array and computes a single return value of the same type as the input values, for example median. + :param dimension: The temporal dimension for aggregation. All data along the dimension will be passed through the specified reducer. If the dimension is not set, the data cube is expected to only have one temporal dimension. + :param context: Additional data to be passed to the reducer. + + :return: A data cube with the same dimensions. The dimension properties (name, type, labels, reference system and resolution) remain unchanged. + """ + return self.process( + process_id="aggregate_temporal_period", + arguments=dict_no_none( + data=THIS, + period=period, + dimension=dimension, + reducer=build_child_callback(reducer, parent_parameters=["data"]), + context=context, + ), + ) + + @openeo_process + def ndvi(self, nir: str = None, red: str = None, target_band: str = None) -> DataCube: + """ + Normalized Difference Vegetation Index (NDVI) + + :param nir: (optional) name of NIR band + :param red: (optional) name of red band + :param target_band: (optional) name of the newly created band + + :return: a DataCube instance + """ + if self.metadata is None: + metadata = None + elif target_band is None: + metadata = self.metadata.reduce_dimension(self.metadata.band_dimension.name) + else: + # TODO: first drop "bands" dim and re-add it with single "ndvi" band + metadata = self.metadata.append_band(Band(name=target_band, common_name="ndvi")) + return self.process( + process_id="ndvi", + arguments=dict_no_none( + data=THIS, nir=nir, red=red, target_band=target_band + ), + metadata=metadata, + ) + + @openeo_process + def rename_dimension(self, source: str, target: str): + """ + Renames a dimension in the data cube while preserving all other properties. + + :param source: The current name of the dimension. Fails with a DimensionNotAvailable error if the specified dimension does not exist. + :param target: A new Name for the dimension. Fails with a DimensionExists error if a dimension with the specified name exists. + + :return: A new datacube with the dimension renamed. + """ + if self._do_metadata_normalization() and target in self.metadata.dimension_names(): + raise ValueError('Target dimension name conflicts with existing dimension: %s.' % target) + return self.process( + process_id="rename_dimension", + arguments=dict_no_none( + data=THIS, + source=self._assert_valid_dimension_name(source), + target=target, + ), + metadata=self.metadata.rename_dimension(source, target) if self.metadata else None, + ) + + @openeo_process + def rename_labels(self, dimension: str, target: list, source: list = None) -> DataCube: + """ + Renames the labels of the specified dimension in the data cube from source to target. + + :param dimension: Dimension name + :param target: The new names for the labels. + :param source: The names of the labels as they are currently in the data cube. + + :return: An DataCube instance + """ + return self.process( + process_id="rename_labels", + arguments=dict_no_none( + data=THIS, + dimension=self._assert_valid_dimension_name(dimension), + target=target, + source=source, + ), + metadata=self.metadata.rename_labels(dimension, target, source) if self.metadata else None, + ) + + @openeo_process(mode="apply") + def linear_scale_range(self, input_min, input_max, output_min, output_max) -> DataCube: + """ + Performs a linear transformation between the input and output range. + + The given number in x is clipped to the bounds specified in inputMin and inputMax so that the underlying formula + + ((x - inputMin) / (inputMax - inputMin)) * (outputMax - outputMin) + outputMin + + never returns any value lower than outputMin or greater than outputMax. + + Potential use case include scaling values to the 8-bit range (0 - 255) often used for numeric representation of + values in one of the channels of the RGB colour model or calculating percentages (0 - 100). + + The no-data value null is passed through and therefore gets propagated. + + :param input_min: Minimum input value + :param input_max: Maximum input value + :param output_min: Minimum value of the desired output range. + :param output_max: Maximum value of the desired output range. + :return: a DataCube instance + """ + + return self.apply(lambda x: x.linear_scale_range(input_min, input_max, output_min, output_max)) + + @openeo_process + def mask(self, mask: DataCube = None, replacement=None) -> DataCube: + """ + Applies a mask to a raster data cube. To apply a vector mask use `mask_polygon`. + + A mask is a raster data cube for which corresponding pixels among `data` and `mask` + are compared and those pixels in `data` are replaced whose pixels in `mask` are non-zero + (for numbers) or true (for boolean values). + The pixel values are replaced with the value specified for `replacement`, + which defaults to null (no data). + + :param mask: the raster mask + :param replacement: the value to replace the masked pixels with + """ + return self.process( + process_id="mask", + arguments=dict_no_none(data=self, mask=mask, replacement=replacement), + ) + + @openeo_process + def mask_polygon( + self, + mask: Union[shapely.geometry.base.BaseGeometry, dict, str, pathlib.Path, Parameter, VectorCube], + srs: str = None, + replacement=None, + inside: bool = None, + ) -> DataCube: + """ + Applies a polygon mask to a raster data cube. To apply a raster mask use `mask`. + + All pixels for which the point at the pixel center does not intersect with any + polygon (as defined in the Simple Features standard by the OGC) are replaced. + This behaviour can be inverted by setting the parameter `inside` to true. + + The pixel values are replaced with the value specified for `replacement`, + which defaults to `no data`. + + :param mask: The geometry to mask with.an be provided in different ways: + + - a shapely geometry + - a GeoJSON-style dictionary, + - a public URL to the geometries in a vector format that is supported by the backend + (also see :py:func:`Connection.list_file_formats() `), + e.g. GeoJSON, GeoParquet, etc. + A ``load_url`` process will automatically be added to the process graph. + - a path (:py:class:`str` or :py:class:`~pathlib.Path`) to a local, client-side GeoJSON file, + which will be loaded automatically to get the geometries as GeoJSON construct. + - a :py:class:`~openeo.rest.vectorcube.VectorCube` instance. + - a :py:class:`~openeo.api.process.Parameter` instance. + + :param srs: The spatial reference system of the provided polygon. + By default longitude-latitude (EPSG:4326) is assumed. + + .. note:: this ``srs`` argument is a non-standard/experimental feature, only supported by specific back-ends. + See https://github.com/Open-EO/openeo-processes/issues/235 for details. + :param replacement: the value to replace the masked pixels with + + .. versionchanged:: 0.36.0 + Support passing a URL as ``geometries`` argument, which will be loaded with the ``load_url`` process. + + .. versionchanged:: 0.36.0 + Support for passing a backend-side path as ``geometries`` argument was removed + (also see :ref:`legacy_read_vector`). + Instead, it's possible to provide a client-side path to a GeoJSON file + (which will be loaded client-side to get the geometries as GeoJSON construct). + """ + valid_geojson_types = ["Polygon", "MultiPolygon", "GeometryCollection", "Feature", "FeatureCollection"] + mask = self._get_geometry_argument(mask, valid_geojson_types=valid_geojson_types, crs=srs) + return self.process( + process_id="mask_polygon", + arguments=dict_no_none( + data=THIS, + mask=mask, + replacement=replacement, + inside=inside + ) + ) + + @openeo_process + def merge_cubes( + self, + other: DataCube, + overlap_resolver: Union[str, PGNode, typing.Callable] = None, + context: Optional[dict] = None, + ) -> DataCube: + """ + Merging two data cubes + + The data cubes have to be compatible. A merge operation without overlap should be reversible with (a set of) filter operations for each of the two cubes. The process performs the join on overlapping dimensions, with the same name and type. + An overlapping dimension has the same name, type, reference system and resolution in both dimensions, but can have different labels. One of the dimensions can have different labels, for all other dimensions the labels must be equal. If data overlaps, the parameter overlap_resolver must be specified to resolve the overlap. + + Examples for merging two data cubes: + + #. Data cubes with the dimensions x, y, t and bands have the same dimension labels in x,y and t, but the labels for the dimension bands are B1 and B2 for the first cube and B3 and B4. An overlap resolver is not needed. The merged data cube has the dimensions x, y, t and bands and the dimension bands has four dimension labels: B1, B2, B3, B4. + #. Data cubes with the dimensions x, y, t and bands have the same dimension labels in x,y and t, but the labels for the dimension bands are B1 and B2 for the first data cube and B2 and B3 for the second. An overlap resolver is required to resolve overlap in band B2. The merged data cube has the dimensions x, y, t and bands and the dimension bands has three dimension labels: B1, B2, B3. + #. Data cubes with the dimensions x, y and t have the same dimension labels in x,y and t. There are two options: + * Keep the overlapping values separately in the merged data cube: An overlap resolver is not needed, but for each data cube you need to add a new dimension using add_dimension. The new dimensions must be equal, except that the labels for the new dimensions must differ by name. The merged data cube has the same dimensions and labels as the original data cubes, plus the dimension added with add_dimension, which has the two dimension labels after the merge. + * Combine the overlapping values into a single value: An overlap resolver is required to resolve the overlap for all pixels. The merged data cube has the same dimensions and labels as the original data cubes, but all pixel values have been processed by the overlap resolver. + #. Merging a data cube with dimensions x, y, t with another cube with dimensions x, y will join on the x, y dimension, so the lower dimension cube is merged with each time step in the higher dimensional cube. This can for instance be used to apply a digital elevation model to a spatiotemporal data cube. + + :param other: The data cube to merge with. + :param overlap_resolver: A reduction operator that resolves the conflict if the data overlaps. The reducer must return a value of the same data type as the input values are. The reduction operator may be a single process such as multiply or consist of multiple sub-processes. null (the default) can be specified if no overlap resolver is required. + :param context: Additional data to be passed to the process. + + :return: The merged data cube. + """ + arguments = {"cube1": self, "cube2": other} + if overlap_resolver: + arguments["overlap_resolver"] = build_child_callback(overlap_resolver, parent_parameters=["x", "y"]) + if ( + self.metadata + and self.metadata.has_band_dimension() + and isinstance(other, DataCube) + and other.metadata + and other.metadata.has_band_dimension() + ): + # Minimal client side metadata merging + merged_metadata = self.metadata + for b in other.metadata.band_dimension.bands: + if b not in merged_metadata.bands: + merged_metadata = merged_metadata.append_band(b) + else: + merged_metadata = None + # Overlapping bands without overlap resolver will give an error in the backend + if context: + arguments["context"] = context + return self.process(process_id="merge_cubes", arguments=arguments, metadata=merged_metadata) + + merge = legacy_alias(merge_cubes, name="merge", since="0.4.6") + + @openeo_process + def apply_kernel( + self, kernel: Union[np.ndarray, List[List[float]]], factor=1.0, border=0, + replace_invalid=0 + ) -> DataCube: + """ + Applies a focal operation based on a weighted kernel to each value of the specified dimensions in the data cube. + + The border parameter determines how the data is extended when the kernel overlaps with the borders. + The following options are available: + + * numeric value - fill with a user-defined constant number n: nnnnnn|abcdefgh|nnnnnn (default, with n = 0) + * replicate - repeat the value from the pixel at the border: aaaaaa|abcdefgh|hhhhhh + * reflect - mirror/reflect from the border: fedcba|abcdefgh|hgfedc + * reflect_pixel - mirror/reflect from the center of the pixel at the border: gfedcb|abcdefgh|gfedcb + * wrap - repeat/wrap the image: cdefgh|abcdefgh|abcdef + + + :param kernel: The kernel to be applied on the data cube. The kernel has to be as many dimensions as the data cube has dimensions. + :param factor: A factor that is multiplied to each value computed by the focal operation. This is basically a shortcut for explicitly multiplying each value by a factor afterwards, which is often required for some kernel-based algorithms such as the Gaussian blur. + :param border: Determines how the data is extended when the kernel overlaps with the borders. Defaults to fill the border with zeroes. + :param replace_invalid: This parameter specifies the value to replace non-numerical or infinite numerical values with. By default, those values are replaced with zeroes. + :return: A data cube with the newly computed values. The resolution, cardinality and the number of dimensions are the same as for the original data cube. + """ + return self.process('apply_kernel', { + 'data': THIS, + 'kernel': kernel.tolist() if isinstance(kernel, np.ndarray) else kernel, + 'factor': factor, + 'border': border, + 'replace_invalid': replace_invalid + }) + + @openeo_process + def resolution_merge( + self, high_resolution_bands: List[str], low_resolution_bands: List[str], method: str = None + ) -> DataCube: + """ + Resolution merging algorithms try to improve the spatial resolution of lower resolution bands + (e.g. Sentinel-2 20M) based on higher resolution bands. (e.g. Sentinel-2 10M). + + External references: + + `Pansharpening explained `_ + + `Example publication: 'Improving the Spatial Resolution of Land Surface Phenology by Fusing Medium- and + Coarse-Resolution Inputs' `_ + + .. warning:: experimental process: not generally supported, API subject to change. + + :param high_resolution_bands: A list of band names to use as 'high-resolution' band. Either the unique band name (metadata field `name` in bands) or one of the common band names (metadata field `common_name` in bands). If unique band name and common name conflict, the unique band name has higher priority. The order of the specified array defines the order of the bands in the data cube. If multiple bands match a common name, all matched bands are included in the original order. These bands will remain unmodified. + :param low_resolution_bands: A list of band names for which the spatial resolution should be increased. Either the unique band name (metadata field `name` in bands) or one of the common band names (metadata field `common_name` in bands). If unique band name and common name conflict, the unique band name has higher priority. The order of the specified array defines the order of the bands in the data cube. If multiple bands match a common name, all matched bands are included in the original order. These bands will be modified by the process. + :param method: The method to use. The supported algorithms can vary between back-ends. Set to `null` (the default) to allow the back-end to choose, which will improve portability, but reduce reproducibility.. + :return: A datacube with the same bands and metadata as the input, but algorithmically increased spatial resolution for the selected bands. + """ + return self.process('resolution_merge', { + 'data': THIS, + 'high_resolution_bands': high_resolution_bands, + 'low_resolution_bands': low_resolution_bands, + 'method': method, + + }) + + def raster_to_vector(self) -> VectorCube: + """ + Converts this raster data cube into a :py:class:`~openeo.rest.vectorcube.VectorCube`. + The bounding polygon of homogenous areas of pixels is constructed. + + .. warning:: experimental process: not generally supported, API subject to change. + + :return: a :py:class:`~openeo.rest.vectorcube.VectorCube` + """ + pg_node = PGNode(process_id="raster_to_vector", arguments={"data": self}) + return VectorCube(pg_node, connection=self._connection) + + ####VIEW methods ####### + + @deprecated( + "Use :py:meth:`aggregate_spatial` with reducer ``'mean'``.", version="0.10.0" + ) + def polygonal_mean_timeseries( + self, polygon: Union[Polygon, MultiPolygon, str] + ) -> VectorCube: + """ + Extract a mean time series for the given (multi)polygon. Its points are + expected to be in the EPSG:4326 coordinate + reference system. + + :param polygon: The (multi)polygon; or a file path or HTTP URL to a GeoJSON file or shape file + """ + return self.aggregate_spatial(geometries=polygon, reducer="mean") + + @deprecated( + "Use :py:meth:`aggregate_spatial` with reducer ``'histogram'``.", + version="0.10.0", + ) + def polygonal_histogram_timeseries( + self, polygon: Union[Polygon, MultiPolygon, str] + ) -> VectorCube: + """ + Extract a histogram time series for the given (multi)polygon. Its points are + expected to be in the EPSG:4326 coordinate + reference system. + + :param polygon: The (multi)polygon; or a file path or HTTP URL to a GeoJSON file or shape file + """ + return self.aggregate_spatial(geometries=polygon, reducer="histogram") + + @deprecated( + "Use :py:meth:`aggregate_spatial` with reducer ``'median'``.", version="0.10.0" + ) + def polygonal_median_timeseries( + self, polygon: Union[Polygon, MultiPolygon, str] + ) -> VectorCube: + """ + Extract a median time series for the given (multi)polygon. Its points are + expected to be in the EPSG:4326 coordinate + reference system. + + :param polygon: The (multi)polygon; or a file path or HTTP URL to a GeoJSON file or shape file + """ + return self.aggregate_spatial(geometries=polygon, reducer="median") + + @deprecated( + "Use :py:meth:`aggregate_spatial` with reducer ``'sd'``.", version="0.10.0" + ) + def polygonal_standarddeviation_timeseries( + self, polygon: Union[Polygon, MultiPolygon, str] + ) -> VectorCube: + """ + Extract a time series of standard deviations for the given (multi)polygon. Its points are + expected to be in the EPSG:4326 coordinate + reference system. + + :param polygon: The (multi)polygon; or a file path or HTTP URL to a GeoJSON file or shape file + """ + return self.aggregate_spatial(geometries=polygon, reducer="sd") + + @openeo_process + def ard_surface_reflectance( + self, atmospheric_correction_method: str, cloud_detection_method: str, elevation_model: str = None, + atmospheric_correction_options: dict = None, cloud_detection_options: dict = None, + ) -> DataCube: + """ + Computes CARD4L compliant surface reflectance values from optical input. + + :param atmospheric_correction_method: The atmospheric correction method to use. + :param cloud_detection_method: The cloud detection method to use. + :param elevation_model: The digital elevation model to use, leave empty to allow the back-end to make a suitable choice. + :param atmospheric_correction_options: Proprietary options for the atmospheric correction method. + :param cloud_detection_options: Proprietary options for the cloud detection method. + :return: Data cube containing bottom of atmosphere reflectances with atmospheric disturbances like clouds and cloud shadows removed. The data returned is CARD4L compliant and contains metadata. + """ + return self.process('ard_surface_reflectance', { + 'data': THIS, + 'atmospheric_correction_method': atmospheric_correction_method, + 'cloud_detection_method': cloud_detection_method, + 'elevation_model': elevation_model, + 'atmospheric_correction_options': atmospheric_correction_options or {}, + 'cloud_detection_options': cloud_detection_options or {}, + }) + + @openeo_process + def atmospheric_correction(self, method: str = None, elevation_model: str = None, options: dict = None) -> DataCube: + """ + Applies an atmospheric correction that converts top of atmosphere reflectance values into bottom of atmosphere/top of canopy reflectance values. + + Note that multiple atmospheric methods exist, but may not be supported by all backends. The method parameter gives + you the option of requiring a specific method, but this may result in an error if the backend does not support it. + + :param method: The atmospheric correction method to use. To get reproducible results, you have to set a specific method. Set to `null` to allow the back-end to choose, which will improve portability, but reduce reproducibility as you *may* get different results if you run the processes multiple times. + :param elevation_model: The digital elevation model to use, leave empty to allow the back-end to make a suitable choice. + :param options: Proprietary options for the atmospheric correction method. + :return: datacube with bottom of atmosphere reflectances + """ + return self.process('atmospheric_correction', { + 'data': THIS, + 'method': method, + 'elevation_model': elevation_model, + 'options': options or {}, + }) + + @openeo_process + def save_result( + self, + format: str = _DEFAULT_RASTER_FORMAT, + options: Optional[dict] = None, + ) -> DataCube: + if self._connection: + formats = set(self._connection.list_output_formats().keys()) + # TODO: map format to correct casing too? + if format.lower() not in {f.lower() for f in formats}: + raise ValueError("Invalid format {f!r}. Should be one of {s}".format(f=format, s=formats)) + return self.process( + process_id="save_result", + arguments={ + "data": THIS, + "format": format, + # TODO: leave out options if unset? + "options": options or {} + } + ) + + def download( + self, + outputfile: Optional[Union[str, pathlib.Path]] = None, + format: Optional[str] = None, + options: Optional[dict] = None, + *, + validate: Optional[bool] = None, + auto_add_save_result: bool = True, + additional: Optional[dict] = None, + job_options: Optional[dict] = None, + ) -> Union[None, bytes]: + """ + Execute synchronously and download the raster data cube, e.g. as GeoTIFF. + + If outputfile is provided, the result is stored on disk locally, otherwise, a bytes object is returned. + The bytes object can be passed on to a suitable decoder for decoding. + + :param outputfile: Optional, an output file if the result needs to be stored on disk. + :param format: Optional, an output format supported by the backend. + :param options: Optional, file format options + :param validate: Optional toggle to enable/prevent validation of the process graphs before execution + (overruling the connection's ``auto_validate`` setting). + :param auto_add_save_result: Automatically add a ``save_result`` node to the process graph if there is none yet. + :param additional: additional (top-level) properties to set in the request body + :param job_options: dictionary of job options to pass to the backend + (under top-level property "job_options") + + :return: None if the result is stored to disk, or a bytes object returned by the backend. + + .. versionchanged:: 0.32.0 + Added ``auto_add_save_result`` option + + .. versionadded:: 0.36.0 + Added arguments ``additional`` and ``job_options``. + """ + # TODO #278 centralize download/create_job/execute_job logic in DataCube, VectorCube, MlModel, ... + cube = self + if auto_add_save_result: + cube = _ensure_save_result( + cube=cube, + format=format, + options=options, + weak_format=guess_format(outputfile) if outputfile else None, + default_format=self._DEFAULT_RASTER_FORMAT, + method="DataCube.download()", + ) + return self._connection.download( + cube.flat_graph(), outputfile, validate=validate, additional=additional, job_options=job_options + ) + + def validate(self) -> List[dict]: + """ + Validate a process graph without executing it. + + :return: list of errors (dictionaries with "code" and "message" fields) + """ + return self._connection.validate_process_graph(self.flat_graph()) + + def tiled_viewing_service(self, type: str, **kwargs) -> Service: + return self._connection.create_service(self.flat_graph(), type=type, **kwargs) + + def _get_spatial_extent_from_load_collection(self): + pg = self.flat_graph() + for node in pg: + if pg[node]["process_id"] == "load_collection": + if "spatial_extent" in pg[node]["arguments"] and all( + cd in pg[node]["arguments"]["spatial_extent"] for cd in ["east", "west", "south", "north"] + ): + return pg[node]["arguments"]["spatial_extent"] + return None + + def preview( + self, + center: Union[Iterable, None] = None, + zoom: Union[int, None] = None, + ): + """ + Creates a service with the process graph and displays a map widget. Only supports XYZ. + + :param center: (optional) Map center. Default is (0,0). + :param zoom: (optional) Zoom level of the map. Default is 1. + + :return: ipyleaflet Map object and the displayed Service + + .. warning:: experimental feature, subject to change. + .. versionadded:: 0.19.0 + """ + if "XYZ" not in self.connection.list_service_types(): + raise OpenEoClientException("Backend does not support service type 'XYZ'.") + + if not in_jupyter_context(): + raise Exception("On-demand preview only supported in Jupyter notebooks!") + try: + import ipyleaflet + except ImportError: + raise Exception( + "Additional modules must be installed for on-demand preview. Run `pip install openeo[jupyter]` or refer to the documentation." + ) + + service = self.tiled_viewing_service("XYZ") + service_metadata = service.describe_service() + + m = ipyleaflet.Map( + center=center or (0, 0), + zoom=zoom or 1, + scroll_wheel_zoom=True, + basemap=ipyleaflet.basemaps.OpenStreetMap.Mapnik, + ) + service_layer = ipyleaflet.TileLayer(url=service_metadata["url"]) + m.add(service_layer) + + if center is None and zoom is None: + spatial_extent = self._get_spatial_extent_from_load_collection() + if spatial_extent is not None: + m.fit_bounds( + [ + [spatial_extent["south"], spatial_extent["west"]], + [spatial_extent["north"], spatial_extent["east"]], + ] + ) + + class Preview: + """ + On-demand preview instance holding the associated XYZ service and ipyleaflet Map + """ + + def __init__(self, service: Service, ipyleaflet_map: ipyleaflet.Map): + self.service = service + self.map = ipyleaflet_map + + def _repr_html_(self): + from IPython.display import display + + display(self.map) + + def delete_service(self): + self.service.delete_service() + + return Preview(service, m) + + def execute_batch( + self, + outputfile: Optional[Union[str, pathlib.Path]] = None, + out_format: Optional[str] = None, + *, + title: Optional[str] = None, + description: Optional[str] = None, + plan: Optional[str] = None, + budget: Optional[float] = None, + print: typing.Callable[[str], None] = print, + max_poll_interval: float = 60, + connection_retry_interval: float = 30, + additional: Optional[dict] = None, + job_options: Optional[dict] = None, + validate: Optional[bool] = None, + auto_add_save_result: bool = True, + # TODO: deprecate `format_options` as keyword arguments + **format_options, + ) -> BatchJob: + """ + Evaluate the process graph by creating a batch job, and retrieving the results when it is finished. + This method is mostly recommended if the batch job is expected to run in a reasonable amount of time. + + For very long-running jobs, you probably do not want to keep the client running. + + :param outputfile: The path of a file to which a result can be written + :param out_format: (optional) File format to use for the job result. + :param additional: additional (top-level) properties to set in the request body + :param job_options: dictionary of job options to pass to the backend + (under top-level property "job_options") + :param validate: Optional toggle to enable/prevent validation of the process graphs before execution + (overruling the connection's ``auto_validate`` setting). + :param auto_add_save_result: Automatically add a ``save_result`` node to the process graph if there is none yet. + + .. versionchanged:: 0.32.0 + Added ``auto_add_save_result`` option + + .. versionadded:: 0.36.0 + Added argument ``additional``. + """ + # TODO: start showing deprecation warnings about these inconsistent argument names + if "format" in format_options and not out_format: + out_format = format_options["format"] # align with 'download' call arg name + + # TODO #278 centralize download/create_job/execute_job logic in DataCube, VectorCube, MlModel, ... + cube = self + if auto_add_save_result: + cube = _ensure_save_result( + cube=cube, + format=out_format, + options=format_options, + weak_format=guess_format(outputfile) if outputfile else None, + default_format=self._DEFAULT_RASTER_FORMAT, + method="DataCube.execute_batch()", + ) + + job = cube.create_job( + title=title, + description=description, + plan=plan, + budget=budget, + additional=additional, + job_options=job_options, + validate=validate, + auto_add_save_result=False, + ) + return job.run_synchronous( + outputfile=outputfile, + print=print, max_poll_interval=max_poll_interval, connection_retry_interval=connection_retry_interval + ) + + def create_job( + self, + out_format: Optional[str] = None, + *, + title: Optional[str] = None, + description: Optional[str] = None, + plan: Optional[str] = None, + budget: Optional[float] = None, + additional: Optional[dict] = None, + job_options: Optional[dict] = None, + validate: Optional[bool] = None, + auto_add_save_result: bool = True, + # TODO: avoid `format_options` as keyword arguments + **format_options, + ) -> BatchJob: + """ + Sends the datacube's process graph as a batch job to the back-end + and return a :py:class:`~openeo.rest.job.BatchJob` instance. + + Note that the batch job will just be created at the back-end, + it still needs to be started and tracked explicitly. + Use :py:meth:`execute_batch` instead to have the openEO Python client take care of that job management. + + :param out_format: output file format. + :param title: job title + :param description: job description + :param plan: The billing plan to process and charge the job with + :param budget: Maximum budget to be spent on executing the job. + Note that some backends do not honor this limit. + :param additional: additional (top-level) properties to set in the request body + :param job_options: dictionary of job options to pass to the backend + (under top-level property "job_options") + :param validate: Optional toggle to enable/prevent validation of the process graphs before execution + (overruling the connection's ``auto_validate`` setting). + :param auto_add_save_result: Automatically add a ``save_result`` node to the process graph if there is none yet. + + :return: Created job. + + .. versionadded:: 0.32.0 + Added ``auto_add_save_result`` option + + .. versionadded:: 0.36.0 + Added ``additional`` argument. + """ + # TODO: add option to also automatically start the job? + # TODO: avoid using all kwargs as format_options + # TODO #278 centralize download/create_job/execute_job logic in DataCube, VectorCube, MlModel, ... + cube = self + if auto_add_save_result: + cube = _ensure_save_result( + cube=cube, + format=out_format, + options=format_options or None, + default_format=self._DEFAULT_RASTER_FORMAT, + method="DataCube.create_job()", + ) + return self._connection.create_job( + process_graph=cube.flat_graph(), + title=title, + description=description, + plan=plan, + budget=budget, + validate=validate, + additional=additional, + job_options=job_options, + ) + + send_job = legacy_alias(create_job, name="send_job", since="0.10.0") + + def save_user_defined_process( + self, + user_defined_process_id: str, + public: bool = False, + summary: Optional[str] = None, + description: Optional[str] = None, + returns: Optional[dict] = None, + categories: Optional[List[str]] = None, + examples: Optional[List[dict]] = None, + links: Optional[List[dict]] = None, + ) -> RESTUserDefinedProcess: + """ + Saves this process graph in the backend as a user-defined process for the authenticated user. + + :param user_defined_process_id: unique identifier for the process + :param public: visible to other users? + :param summary: A short summary of what the process does. + :param description: Detailed description to explain the entity. CommonMark 0.29 syntax MAY be used for rich text representation. + :param returns: Description and schema of the return value. + :param categories: A list of categories. + :param examples: A list of examples. + :param links: A list of links. + :return: a RESTUserDefinedProcess instance + """ + return self._connection.save_user_defined_process( + user_defined_process_id=user_defined_process_id, + process_graph=self.flat_graph(), public=public, summary=summary, description=description, + returns=returns, categories=categories, examples=examples, links=links, + ) + + def execute(self, *, validate: Optional[bool] = None, auto_decode: bool = True) -> Union[dict, requests.Response]: + """ + Execute a process graph synchronously and return the result. If the result is a JSON object, it will be parsed. + + :param validate: Optional toggle to enable/prevent validation of the process graphs before execution + (overruling the connection's ``auto_validate`` setting). + :param auto_decode: Boolean flag to enable/disable automatic JSON decoding of the response. Defaults to True. + + :return: parsed JSON response as a dict if auto_decode is True, otherwise response object + """ + # TODO: deprecated this. It's ill-defined how to "execute" a data cube without downloading it. + return self._connection.execute(self.flat_graph(), validate=validate, auto_decode=auto_decode) + + @staticmethod + @deprecated(reason="Use :py:func:`openeo.udf.run_code.execute_local_udf` instead", version="0.7.0") + def execute_local_udf(udf: str, datacube: Union[str, 'xarray.DataArray', 'XarrayDataCube'] = None, fmt='netcdf'): + import openeo.udf.run_code + return openeo.udf.run_code.execute_local_udf(udf=udf, datacube=datacube, fmt=fmt) + + @openeo_process + def ard_normalized_radar_backscatter( + self, elevation_model: str = None, contributing_area=False, + ellipsoid_incidence_angle: bool = False, noise_removal: bool = True + ) -> DataCube: + """ + Computes CARD4L compliant backscatter (gamma0) from SAR input. + This method is a variant of :py:meth:`~openeo.rest.datacube.DataCube.sar_backscatter`, + with restricted parameters to generate backscatter according to CARD4L specifications. + + Note that backscatter computation may require instrument specific metadata that is tightly coupled to the original SAR products. + As a result, this process may only work in combination with loading data from specific collections, not with general data cubes. + + :param elevation_model: The digital elevation model to use. Set to None (the default) to allow the back-end to choose, which will improve portability, but reduce reproducibility. + :param contributing_area: If set to `true`, a DEM-based local contributing area band named `contributing_area` + is added. The values are given in square meters. + :param ellipsoid_incidence_angle: If set to `True`, an ellipsoidal incidence angle band named `ellipsoid_incidence_angle` is added. The values are given in degrees. + :param noise_removal: If set to `false`, no noise removal is applied. Defaults to `True`, which removes noise. + + :return: Backscatter values expressed as gamma0. The data returned is CARD4L compliant and contains metadata. By default, the backscatter values are given in linear scale. + """ + return self.process(process_id="ard_normalized_radar_backscatter", arguments={ + "data": THIS, + "elevation_model": elevation_model, + "contributing_area": contributing_area, + "ellipsoid_incidence_angle": ellipsoid_incidence_angle, + "noise_removal": noise_removal + }) + + @openeo_process + def sar_backscatter( + self, + coefficient: Union[str, None] = "gamma0-terrain", + elevation_model: Union[str, None] = None, + mask: bool = False, + contributing_area: bool = False, + local_incidence_angle: bool = False, + ellipsoid_incidence_angle: bool = False, + noise_removal: bool = True, + options: Optional[dict] = None + ) -> DataCube: + """ + Computes backscatter from SAR input. + + Note that backscatter computation may require instrument specific metadata that is tightly coupled to the + original SAR products. As a result, this process may only work in combination with loading data from + specific collections, not with general data cubes. + + :param coefficient: Select the radiometric correction coefficient. + The following options are available: + + - `"beta0"`: radar brightness + - `"sigma0-ellipsoid"`: ground area computed with ellipsoid earth model + - `"sigma0-terrain"`: ground area computed with terrain earth model + - `"gamma0-ellipsoid"`: ground area computed with ellipsoid earth model in sensor line of sight + - `"gamma0-terrain"`: ground area computed with terrain earth model in sensor line of sight (default) + - `None`: non-normalized backscatter + :param elevation_model: The digital elevation model to use. Set to `None` (the default) to allow + the back-end to choose, which will improve portability, but reduce reproducibility. + :param mask: If set to `true`, a data mask is added to the bands with the name `mask`. + It indicates which values are valid (1), invalid (0) or contain no-data (null). + :param contributing_area: If set to `true`, a DEM-based local contributing area band named `contributing_area` + is added. The values are given in square meters. + :param local_incidence_angle: If set to `true`, a DEM-based local incidence angle band named + `local_incidence_angle` is added. The values are given in degrees. + :param ellipsoid_incidence_angle: If set to `true`, an ellipsoidal incidence angle band named + `ellipsoid_incidence_angle` is added. The values are given in degrees. + :param noise_removal: If set to `false`, no noise removal is applied. Defaults to `true`, which removes noise. + :param options: dictionary with additional (backend-specific) options. + :return: + + .. versionadded:: 0.4.9 + .. versionchanged:: 0.4.10 replace `orthorectify` and `rtc` arguments with `coefficient`. + """ + coefficient_options = [ + "beta0", "sigma0-ellipsoid", "sigma0-terrain", "gamma0-ellipsoid", "gamma0-terrain", None + ] + if coefficient not in coefficient_options: + raise OpenEoClientException("Invalid `sar_backscatter` coefficient {c!r}. Should be one of {o}".format( + c=coefficient, o=coefficient_options + )) + arguments = { + "data": THIS, + "coefficient": coefficient, + "elevation_model": elevation_model, + "mask": mask, + "contributing_area": contributing_area, + "local_incidence_angle": local_incidence_angle, + "ellipsoid_incidence_angle": ellipsoid_incidence_angle, + "noise_removal": noise_removal, + } + if options: + arguments["options"] = options + return self.process(process_id="sar_backscatter", arguments=arguments) + + @openeo_process + def fit_curve(self, parameters: list, function: Union[str, PGNode, typing.Callable], dimension: str): + """ + Use non-linear least squares to fit a model function `y = f(x, parameters)` to data. + + The process throws an `InvalidValues` exception if invalid values are encountered. + Invalid values are finite numbers (see also ``is_valid()``). + + .. warning:: experimental process: not generally supported, API subject to change. + https://github.com/Open-EO/openeo-processes/pull/240 + + :param parameters: + :param function: "child callback" function, see :ref:`callbackfunctions` + :param dimension: + """ + # TODO: does this return a `DataCube`? Shouldn't it just return an array (wrapper)? + return self.process( + process_id="fit_curve", + arguments={ + "data": THIS, + "parameters": parameters, + "function": build_child_callback(function, parent_parameters=["x", "parameters"]), + "dimension": dimension, + }, + ) + + @openeo_process + def predict_curve( + self, parameters: list, function: Union[str, PGNode, typing.Callable], dimension: str, + labels=None + ): + """ + Predict values using a model function and pre-computed parameters. + + .. warning:: experimental process: not generally supported, API subject to change. + https://github.com/Open-EO/openeo-processes/pull/240 + + :param parameters: + :param function: "child callback" function, see :ref:`callbackfunctions` + :param dimension: + """ + return self.process( + process_id="predict_curve", + arguments={ + "data": THIS, + "parameters": parameters, + "function": build_child_callback(function, parent_parameters=["x", "parameters"]), + "dimension": dimension, + "labels": labels, + }, + ) + + @openeo_process(mode="reduce_dimension") + def predict_random_forest(self, model: Union[str, BatchJob, MlModel], dimension: str = "bands"): + """ + Apply ``reduce_dimension`` process with a ``predict_random_forest`` reducer. + + :param model: a reference to a trained model, one of + + - a :py:class:`~openeo.rest.mlmodel.MlModel` instance (e.g. loaded from :py:meth:`Connection.load_ml_model`) + - a :py:class:`~openeo.rest.job.BatchJob` instance of a batch job that saved a single random forest model + - a job id (``str``) of a batch job that saved a single random forest model + - a STAC item URL (``str``) to load the random forest from. + (The STAC Item must implement the `ml-model` extension.) + :param dimension: dimension along which to apply the ``reduce_dimension`` process. + + .. versionadded:: 0.10.0 + """ + if not isinstance(model, MlModel): + model = MlModel.load_ml_model(connection=self.connection, id=model) + reducer = PGNode( + process_id="predict_random_forest", data={"from_parameter": "data"}, model={"from_parameter": "context"} + ) + return self.reduce_dimension(dimension=dimension, reducer=reducer, context=model) + + @openeo_process + def dimension_labels(self, dimension: str) -> DataCube: + """ + Gives all labels for a dimension in the data cube. The labels have the same order as in the data cube. + + :param dimension: The name of the dimension to get the labels for. + """ + if self._do_metadata_normalization(): + dimension_names = self.metadata.dimension_names() + if dimension_names and dimension not in dimension_names: + raise ValueError(f"Invalid dimension name {dimension!r}, should be one of {dimension_names}") + return self.process(process_id="dimension_labels", arguments={"data": THIS, "dimension": dimension}) + + @openeo_process + def flatten_dimensions(self, dimensions: List[str], target_dimension: str, label_separator: Optional[str] = None): + """ + Combines multiple given dimensions into a single dimension by flattening the values + and merging the dimension labels with the given `label_separator`. Non-string dimension labels will + be converted to strings. This process is the opposite of the process :py:meth:`unflatten_dimension()` + but executing both processes subsequently doesn't necessarily create a data cube that + is equal to the original data cube. + + :param dimensions: The names of the dimension to combine. + :param target_dimension: The name of a target dimension with a single dimension label to replace. + :param label_separator: The string that will be used as a separator for the concatenated dimension labels. + :return: A data cube with the new shape. + + .. warning:: experimental process: not generally supported, API subject to change. + .. versionadded:: 0.10.0 + """ + return self.process( + process_id="flatten_dimensions", + arguments=dict_no_none( + data=THIS, + dimensions=dimensions, + target_dimension=target_dimension, + label_separator=label_separator, + ), + ) + + @openeo_process + def unflatten_dimension(self, dimension: str, target_dimensions: List[str], label_separator: Optional[str] = None): + """ + Splits a single dimension into multiple dimensions by systematically extracting values and splitting + the dimension labels by the given `label_separator`. + This process is the opposite of the process :py:meth:`flatten_dimensions()` but executing both processes + subsequently doesn't necessarily create a data cube that is equal to the original data cube. + + :param dimension: The name of the dimension to split. + :param target_dimensions: The names of the target dimensions. + :param label_separator: The string that will be used as a separator to split the dimension labels. + :return: A data cube with the new shape. + + .. warning:: experimental process: not generally supported, API subject to change. + .. versionadded:: 0.10.0 + """ + return self.process( + process_id="unflatten_dimension", + arguments=dict_no_none( + data=THIS, + dimension=dimension, + target_dimensions=target_dimensions, + label_separator=label_separator, + ), + ) diff --git a/lib/openeo/rest/graph_building.py b/lib/openeo/rest/graph_building.py new file mode 100644 index 000000000..d05eae930 --- /dev/null +++ b/lib/openeo/rest/graph_building.py @@ -0,0 +1,78 @@ +""" +Public openEO process graph building utilities +''''''''''''''''''''''''''''''''''''''''''''''' + +""" +from __future__ import annotations + +from typing import Optional + +from openeo.internal.graph_building import PGNode, _FromNodeMixin +from openeo.processes import ProcessBuilder + + +class CollectionProperty(_FromNodeMixin): + """ + Helper object to easily create simple collection metadata property filters + to be used with :py:meth:`Connection.load_collection() `. + + .. note:: This class should not be used directly by end user code. + Use the :py:func:`~openeo.rest.graph_building.collection_property` factory instead. + + .. warning:: this is an experimental feature, naming might change. + """ + + def __init__(self, name: str, _builder: Optional[ProcessBuilder] = None): + self.name = name + self._builder = _builder or ProcessBuilder(pgnode={"from_parameter": "value"}) + + def from_node(self) -> PGNode: + return self._builder.from_node() + + def __eq__(self, other) -> CollectionProperty: + return CollectionProperty(self.name, _builder=self._builder == other) + + def __ne__(self, other) -> CollectionProperty: + return CollectionProperty(self.name, _builder=self._builder != other) + + def __gt__(self, other) -> CollectionProperty: + return CollectionProperty(self.name, _builder=self._builder > other) + + def __ge__(self, other) -> CollectionProperty: + return CollectionProperty(self.name, _builder=self._builder >= other) + + def __lt__(self, other) -> CollectionProperty: + return CollectionProperty(self.name, _builder=self._builder < other) + + def __le__(self, other) -> CollectionProperty: + return CollectionProperty(self.name, _builder=self._builder <= other) + + +def collection_property(name: str) -> CollectionProperty: + """ + Helper to easily create simple collection metadata property filters + to be used with :py:meth:`Connection.load_collection() `. + + Usage example: + + .. code-block:: python + + from openeo import collection_property + ... + + connection.load_collection( + ... + properties=[ + collection_property("eo:cloud_cover") <= 75, + collection_property("platform") == "Sentinel-2B", + ] + ) + + .. warning:: this is an experimental feature, naming might change. + + .. versionadded:: 0.26.0 + + :param name: name of the collection property to filter on + :return: an object that supports operators like ``<=``, ``==`` to easily build simple property filters. + """ + return CollectionProperty(name=name) diff --git a/lib/openeo/rest/job.py b/lib/openeo/rest/job.py new file mode 100644 index 000000000..e3f307a71 --- /dev/null +++ b/lib/openeo/rest/job.py @@ -0,0 +1,546 @@ +from __future__ import annotations + +import datetime +import json +import logging +import time +import typing +from pathlib import Path +from typing import Dict, List, Optional, Union + +import requests + +from openeo.api.logs import LogEntry, log_level_name, normalize_log_level +from openeo.internal.documentation import openeo_endpoint +from openeo.internal.jupyter import ( + VisualDict, + VisualList, + render_component, + render_error, +) +from openeo.internal.warnings import deprecated, legacy_alias +from openeo.rest import ( + DEFAULT_DOWNLOAD_CHUNK_SIZE, + JobFailedException, + OpenEoApiError, + OpenEoApiPlainError, + OpenEoClientException, +) +from openeo.util import ensure_dir + +if typing.TYPE_CHECKING: + # Imports for type checking only (circular import issue at runtime). + from openeo.rest.connection import Connection + +logger = logging.getLogger(__name__) + + +DEFAULT_JOB_RESULTS_FILENAME = "job-results.json" + + +class BatchJob: + """ + Handle for an openEO batch job, allowing it to describe, start, cancel, inspect results, etc. + + .. versionadded:: 0.11.0 + This class originally had the more cryptic name :py:class:`RESTJob`, + which is still available as legacy alias, + but :py:class:`BatchJob` is recommended since version 0.11.0. + + """ + + # TODO #425 method to bootstrap `load_stac` directly from a BatchJob object + + def __init__(self, job_id: str, connection: Connection): + self.job_id = job_id + """Unique identifier of the batch job (string).""" + + self.connection = connection + + def __repr__(self): + return '<{c} job_id={i!r}>'.format(c=self.__class__.__name__, i=self.job_id) + + def _repr_html_(self): + data = self.describe() + currency = self.connection.capabilities().currency() + return render_component('job', data=data, parameters={'currency': currency}) + + @openeo_endpoint("GET /jobs/{job_id}") + def describe(self) -> dict: + """ + Get detailed metadata about a submitted batch job + (title, process graph, status, progress, ...). + + .. versionadded:: 0.20.0 + This method was previously called :py:meth:`describe_job`. + """ + return self.connection.get(f"/jobs/{self.job_id}", expected_status=200).json() + + describe_job = legacy_alias(describe, name="describe_job", since="0.20.0", mode="soft") + + def status(self) -> str: + """ + Get the status of the batch job + + :return: batch job status, one of "created", "queued", "running", "canceled", "finished" or "error". + """ + return self.describe().get("status", "N/A") + + @openeo_endpoint("DELETE /jobs/{job_id}") + def delete(self): + """ + Delete this batch job. + + .. versionadded:: 0.20.0 + This method was previously called :py:meth:`delete_job`. + """ + self.connection.delete(f"/jobs/{self.job_id}", expected_status=204) + + delete_job = legacy_alias(delete, name="delete_job", since="0.20.0", mode="soft") + + @openeo_endpoint("GET /jobs/{job_id}/estimate") + def estimate(self): + """Calculate time/cost estimate for a job.""" + data = self.connection.get( + f"/jobs/{self.job_id}/estimate", expected_status=200 + ).json() + currency = self.connection.capabilities().currency() + return VisualDict('job-estimate', data=data, parameters={'currency': currency}) + + estimate_job = legacy_alias(estimate, name="estimate_job", since="0.20.0", mode="soft") + + @openeo_endpoint("POST /jobs/{job_id}/results") + def start(self) -> BatchJob: + """ + Start this batch job. + + :return: Started batch job + + .. versionadded:: 0.20.0 + This method was previously called :py:meth:`start_job`. + """ + self.connection.post(f"/jobs/{self.job_id}/results", expected_status=202) + return self + + start_job = legacy_alias(start, name="start_job", since="0.20.0", mode="soft") + + @openeo_endpoint("DELETE /jobs/{job_id}/results") + def stop(self): + """ + Stop this batch job. + + .. versionadded:: 0.20.0 + This method was previously called :py:meth:`stop_job`. + """ + self.connection.delete(f"/jobs/{self.job_id}/results", expected_status=204) + + stop_job = legacy_alias(stop, name="stop_job", since="0.20.0", mode="soft") + + def get_results_metadata_url(self, *, full: bool = False) -> str: + """Get results metadata URL""" + url = f"/jobs/{self.job_id}/results" + if full: + url = self.connection.build_url(url) + return url + + @deprecated("Use :py:meth:`~BatchJob.get_results` instead.", version="0.4.10") + def list_results(self) -> dict: + """Get batch job results metadata.""" + return self.get_results().get_metadata() + + def download_result(self, target: Union[str, Path] = None) -> Path: + """ + Download single job result to the target file path or into folder (current working dir by default). + + Fails if there are multiple result files. + + :param target: String or path where the file should be downloaded to. + """ + return self.get_results().download_file(target=target) + + @deprecated( + "Instead use :py:meth:`BatchJob.get_results` and the more flexible download functionality of :py:class:`JobResults`", + version="0.4.10") + def download_results(self, target: Union[str, Path] = None) -> Dict[Path, dict]: + """ + Download all job result files into given folder (current working dir by default). + + The names of the files are taken directly from the backend. + + :param target: String/path, folder where to put the result files. + :return: file_list: Dict containing the downloaded file path as value and asset metadata + """ + return self.get_result().download_files(target) + + @deprecated("Use :py:meth:`BatchJob.get_results` instead.", version="0.4.10") + def get_result(self): + return _Result(self) + + def get_results(self) -> JobResults: + """ + Get handle to batch job results for result metadata inspection or downloading resulting assets. + + .. versionadded:: 0.4.10 + """ + return JobResults(job=self) + + def logs( + self, offset: Optional[str] = None, level: Optional[Union[str, int]] = None + ) -> List[LogEntry]: + """Retrieve job logs. + + :param offset: The last identifier (property ``id`` of a LogEntry) the client has received. + + If provided, the back-ends only sends the entries that occurred after the specified identifier. + If not provided or empty, start with the first entry. + + Defaults to None. + + :param level: Minimum log level to retrieve. + + You can use either constants from Python's standard module ``logging`` + or their names (case-insensitive). + + For example: + ``logging.INFO``, ``"info"`` or ``"INFO"`` can all be used to show the messages + for level ``logging.INFO`` and above, i.e. also ``logging.WARNING`` and + ``logging.ERROR`` will be included. + + Default is to show all log levels, in other words ``logging.DEBUG``. + This is also the result when you explicitly pass log_level=None or log_level="". + + :return: A list containing the log entries for the batch job. + """ + url = f"/jobs/{self.job_id}/logs" + params = {} + if offset is not None: + params["offset"] = offset + if level is not None: + params["level"] = log_level_name(level) + response = self.connection.get(url, params=params, expected_status=200) + logs = response.json()["logs"] + + # Only filter logs when specified. + # We should still support client-side log_level filtering because not all backends + # support the minimum log level parameter. + if level is not None: + log_level = normalize_log_level(level) + logs = ( + log + for log in logs + if normalize_log_level(log.get("level")) >= log_level + ) + + entries = [LogEntry(log) for log in logs] + return VisualList("logs", data=entries) + + def run_synchronous( + self, outputfile: Union[str, Path, None] = None, + print=print, max_poll_interval=60, connection_retry_interval=30 + ) -> BatchJob: + """Start the job, wait for it to finish and download result""" + self.start_and_wait( + print=print, max_poll_interval=max_poll_interval, connection_retry_interval=connection_retry_interval + ) + # TODO #135 support multi file result sets too? + if outputfile is not None: + self.download_result(outputfile) + return self + + def start_and_wait( + self, print=print, max_poll_interval: int = 60, connection_retry_interval: int = 30, soft_error_max=10 + ) -> BatchJob: + """ + Start the batch job, poll its status and wait till it finishes (or fails) + + :param print: print/logging function to show progress/status + :param max_poll_interval: maximum number of seconds to sleep between status polls + :param connection_retry_interval: how long to wait when status poll failed due to connection issue + :param soft_error_max: maximum number of soft errors (e.g. temporary connection glitches) to allow + :return: + """ + # TODO rename `connection_retry_interval` to something more generic? + start_time = time.time() + + def elapsed() -> str: + return str(datetime.timedelta(seconds=time.time() - start_time)).rsplit(".")[0] + + def print_status(msg: str): + print("{t} Job {i!r}: {m}".format(t=elapsed(), i=self.job_id, m=msg)) + + # TODO: make `max_poll_interval`, `connection_retry_interval` class constants or instance properties? + print_status("send 'start'") + self.start() + + # TODO: also add `wait` method so you can track a job that already has started explicitly + # or just rename this method to `wait` and automatically do start if not started yet? + + # Start with fast polling. + poll_interval = min(5, max_poll_interval) + status = None + _soft_error_count = 0 + + def soft_error(message: str): + """Non breaking error (unless we had too much of them)""" + nonlocal _soft_error_count + _soft_error_count += 1 + if _soft_error_count > soft_error_max: + raise OpenEoClientException("Excessive soft errors") + print_status(message) + time.sleep(connection_retry_interval) + + while True: + # TODO: also allow a hard time limit on this infinite poll loop? + try: + job_info = self.describe() + except requests.ConnectionError as e: + soft_error("Connection error while polling job status: {e}".format(e=e)) + continue + except OpenEoApiPlainError as e: + if e.http_status_code in [502, 503]: + soft_error("Service availability error while polling job status: {e}".format(e=e)) + continue + else: + raise + + status = job_info.get("status", "N/A") + progress = '{p}%'.format(p=job_info["progress"]) if "progress" in job_info else "N/A" + print_status("{s} (progress {p})".format(s=status, p=progress)) + if status not in ('submitted', 'created', 'queued', 'running'): + break + + # Sleep for next poll (and adaptively make polling less frequent) + time.sleep(poll_interval) + poll_interval = min(1.25 * poll_interval, max_poll_interval) + + if status != "finished": + # TODO: allow to disable this printing logs (e.g. in non-interactive contexts)? + # TODO: render logs jupyter-aware in a notebook context? + print(f"Your batch job {self.job_id!r} failed. Error logs:") + print(self.logs(level=logging.ERROR)) + print( + f"Full logs can be inspected in an openEO (web) editor or with `connection.job({self.job_id!r}).logs()`." + ) + raise JobFailedException( + f"Batch job {self.job_id!r} didn't finish successfully. Status: {status} (after {elapsed()}).", + job=self, + ) + + return self + + +@deprecated(reason="Use :py:class:`BatchJob` instead", version="0.11.0") +class RESTJob(BatchJob): + """ + Legacy alias for :py:class:`BatchJob`. + """ + + +class ResultAsset: + """ + Result asset of a batch job (e.g. a GeoTIFF or JSON file) + + .. versionadded:: 0.4.10 + """ + + def __init__(self, job: BatchJob, name: str, href: str, metadata: dict): + self.job = job + + self.name = name + """Asset name as advertised by the backend.""" + + self.href = href + """Download URL of the asset.""" + + self.metadata = metadata + """Asset metadata provided by the backend, possibly containing keys "type" (for media type), "roles", "title", "description".""" + + def __repr__(self): + return "".format( + n=self.name, t=self.metadata.get("type", "unknown"), h=self.href + ) + + def download( + self, target: Optional[Union[Path, str]] = None, *, chunk_size: int = DEFAULT_DOWNLOAD_CHUNK_SIZE + ) -> Path: + """ + Download asset to given location + + :param target: download target path. Can be an existing folder + (in which case the filename advertised by backend will be used) + or full file name. By default, the working directory will be used. + :param chunk_size: chunk size for streaming response. + """ + target = Path(target or Path.cwd()) + if target.is_dir(): + target = target / self.name + ensure_dir(target.parent) + logger.info("Downloading Job result asset {n!r} from {h!s} to {t!s}".format(n=self.name, h=self.href, t=target)) + response = self._get_response(stream=True) + with target.open("wb") as f: + for block in response.iter_content(chunk_size=chunk_size): + f.write(block) + return target + + def _get_response(self, stream=True) -> requests.Response: + return self.job.connection.get(self.href, stream=stream) + + def load_json(self) -> dict: + """Load asset in memory and parse as JSON.""" + if not (self.name.lower().endswith(".json") or self.metadata.get("type") == "application/json"): + logger.warning("Asset might not be JSON") + return self._get_response().json() + + def load_bytes(self) -> bytes: + """Load asset in memory as raw bytes.""" + return self._get_response().content + + # TODO: more `load` methods e.g.: load GTiff asset directly as numpy array + + +class MultipleAssetException(OpenEoClientException): + pass + + +class JobResults: + """ + Results of a batch job: listing of one or more output files (assets) + and some metadata. + + .. versionadded:: 0.4.10 + """ + + def __init__(self, job: BatchJob): + self._job = job + self._results = None + + def __repr__(self): + return "".format(j=self._job.job_id) + + def get_job_id(self) -> str: + return self._job.job_id + + def _repr_html_(self): + try: + response = self.get_metadata() + return render_component("batch-job-result", data = response) + except OpenEoApiError as error: + return render_error(error) + + def get_metadata(self, force=False) -> dict: + """Get batch job results metadata (parsed JSON)""" + if self._results is None or force: + self._results = self._job.connection.get( + self._job.get_results_metadata_url(), expected_status=200 + ).json() + return self._results + + # TODO: provide methods for `stac_version`, `id`, `geometry`, `properties`, `links`, ...? + + def get_assets(self) -> List[ResultAsset]: + """ + Get all assets from the job results. + """ + # TODO: add arguments to filter on metadata, e.g. to only get assets of type "image/tiff" + metadata = self.get_metadata() + # API 1.0 style: dictionary mapping filenames to metadata dict (with at least a "href" field) + assets = metadata.get("assets", {}) + if not assets: + logger.warning("No assets found in job result metadata.") + return [ + ResultAsset(job=self._job, name=name, href=asset["href"], metadata=asset) + for name, asset in assets.items() + ] + + def get_asset(self, name: str = None) -> ResultAsset: + """ + Get single asset by name or without name if there is only one. + """ + # TODO: also support getting a single asset by type or role? + assets = self.get_assets() + if len(assets) == 0: + raise OpenEoClientException("No assets in result.") + if name is None: + if len(assets) == 1: + return assets[0] + else: + raise MultipleAssetException("Multiple result assets for job {j}: {a}".format( + j=self._job.job_id, a=[a.name for a in assets] + )) + else: + try: + return next(a for a in assets if a.name == name) + except StopIteration: + raise OpenEoClientException( + "No asset {n!r} in: {a}".format(n=name, a=[a.name for a in assets]) + ) + + def download_file(self, target: Union[Path, str] = None, name: str = None) -> Path: + """ + Download single asset. Can be used when there is only one asset in the + :py:class:`JobResults`, or when the desired asset name is given explicitly. + + :param target: path to download to. Can be an existing directory + (in which case the filename advertised by backend will be used) + or full file name. By default, the working directory will be used. + :param name: asset name to download (not required when there is only one asset) + :return: path of downloaded asset + """ + try: + return self.get_asset(name=name).download(target=target) + except MultipleAssetException: + raise OpenEoClientException( + "Can not use `download_file` with multiple assets. Use `download_files` instead.") + + def download_files(self, target: Union[Path, str] = None, include_stac_metadata: bool = True) -> List[Path]: + """ + Download all assets to given folder. + + :param target: path to folder to download to (must be a folder if it already exists) + :param include_stac_metadata: whether to download the job result metadata as a STAC (JSON) file. + :return: list of paths to the downloaded assets. + """ + target = Path(target or Path.cwd()) + if target.exists() and not target.is_dir(): + raise OpenEoClientException(f"Target argument {target} exists but isn't a folder.") + ensure_dir(target) + + downloaded = [a.download(target) for a in self.get_assets()] + + if include_stac_metadata: + # TODO #184: convention for metadata file name? + metadata_file = target / DEFAULT_JOB_RESULTS_FILENAME + # TODO #184: rewrite references to locally downloaded assets? + metadata_file.write_text(json.dumps(self.get_metadata())) + downloaded.append(metadata_file) + + return downloaded + + +@deprecated(reason="Use :py:class:`JobResults` instead", version="0.4.10") +class _Result: + """ + Wrapper around `JobResults` to adapt old deprecated "Result" API. + + .. deprecated:: 0.4.10 + """ + + # TODO: deprecated: remove this + + def __init__(self, job): + self.results = JobResults(job=job) + + def download_file(self, target: Union[str, Path] = None) -> Path: + return self.results.download_file(target=target) + + def download_files(self, target: Union[str, Path] = None) -> Dict[Path, dict]: + target = Path(target or Path.cwd()) + if target.exists() and not target.is_dir(): + raise OpenEoClientException(f"Target argument {target} exists but isn't a folder.") + return {a.download(target): a.metadata for a in self.results.get_assets()} + + def load_json(self) -> dict: + return self.results.get_asset().load_json() + + def load_bytes(self) -> bytes: + return self.results.get_asset().load_bytes() diff --git a/lib/openeo/rest/mlmodel.py b/lib/openeo/rest/mlmodel.py new file mode 100644 index 000000000..1220a7701 --- /dev/null +++ b/lib/openeo/rest/mlmodel.py @@ -0,0 +1,146 @@ +from __future__ import annotations + +import logging +import pathlib +import typing +from typing import Optional, Union + +from openeo.internal.documentation import openeo_process +from openeo.internal.graph_building import PGNode +from openeo.rest._datacube import _ProcessGraphAbstraction +from openeo.rest.job import BatchJob + +if typing.TYPE_CHECKING: + # Imports for type checking only (circular import issue at runtime). + from openeo import Connection + +_log = logging.getLogger(__name__) + + +class MlModel(_ProcessGraphAbstraction): + """ + A machine learning model. + + It is the result of a training procedure, e.g. output of a ``fit_...`` process, + and can be used for prediction (classification or regression) with the corresponding ``predict_...`` process. + + .. versionadded:: 0.10.0 + """ + + def __init__(self, graph: PGNode, connection: Union[Connection, None]): + super().__init__(pgnode=graph, connection=connection) + + def save_ml_model(self, options: Optional[dict] = None): + """ + Saves a machine learning model as part of a batch job. + + :param options: Additional parameters to create the file(s). + """ + pgnode = PGNode( + process_id="save_ml_model", + arguments={"data": self, "options": options or {}} + ) + return MlModel(graph=pgnode, connection=self._connection) + + @staticmethod + @openeo_process + def load_ml_model(connection: Connection, id: Union[str, BatchJob]) -> MlModel: + """ + Loads a machine learning model from a STAC Item. + + :param connection: connection object + :param id: STAC item reference, as URL, batch job (id) or user-uploaded file + :return: + + .. versionadded:: 0.10.0 + """ + if isinstance(id, BatchJob): + id = id.job_id + return MlModel(graph=PGNode(process_id="load_ml_model", id=id), connection=connection) + + def execute_batch( + self, + outputfile: Union[str, pathlib.Path], + *, + title: Optional[str] = None, + description: Optional[str] = None, + plan: Optional[str] = None, + budget: Optional[float] = None, + print=print, + max_poll_interval=60, + connection_retry_interval=30, + additional: Optional[dict] = None, + job_options: Optional[dict] = None, + ) -> BatchJob: + """ + Evaluate the process graph by creating a batch job, and retrieving the results when it is finished. + This method is mostly recommended if the batch job is expected to run in a reasonable amount of time. + + For very long running jobs, you probably do not want to keep the client running. + + :param job_options: + :param outputfile: The path of a file to which a result can be written + :param out_format: (optional) Format of the job result. + :param format_options: String Parameters for the job result format + :param additional: additional (top-level) properties to set in the request body + :param job_options: dictionary of job options to pass to the backend + (under top-level property "job_options") + + .. versionadded:: 0.36.0 + Added argument ``additional``. + """ + job = self.create_job( + title=title, + description=description, + plan=plan, + budget=budget, + additional=additional, + job_options=job_options, + ) + return job.run_synchronous( + # TODO #135 support multi file result sets too + outputfile=outputfile, + print=print, max_poll_interval=max_poll_interval, connection_retry_interval=connection_retry_interval + ) + + def create_job( + self, + *, + title: Optional[str] = None, + description: Optional[str] = None, + plan: Optional[str] = None, + budget: Optional[float] = None, + additional: Optional[dict] = None, + job_options: Optional[dict] = None, + ) -> BatchJob: + """ + Sends a job to the backend and returns a ClientJob instance. + + :param title: job title + :param description: job description + :param plan: The billing plan to process and charge the job with + :param budget: Maximum budget to be spent on executing the job. + Note that some backends do not honor this limit. + :param additional: additional (top-level) properties to set in the request body + :param job_options: dictionary of job options to pass to the backend + (under top-level property "job_options") + :param format_options: String Parameters for the job result format + :return: Created job. + + .. versionadded:: 0.36.0 + Added argument ``additional``. + """ + # TODO: centralize `create_job` for `DataCube`, `VectorCube`, `MlModel`, ... + pg = self + if pg.result_node().process_id not in {"save_ml_model"}: + _log.warning("Process graph has no final `save_ml_model`. Adding it automatically.") + pg = pg.save_ml_model() + return self._connection.create_job( + process_graph=pg.flat_graph(), + title=title, + description=description, + plan=plan, + budget=budget, + additional=additional, + job_options=job_options, + ) diff --git a/lib/openeo/rest/multiresult.py b/lib/openeo/rest/multiresult.py new file mode 100644 index 000000000..82f10fb5c --- /dev/null +++ b/lib/openeo/rest/multiresult.py @@ -0,0 +1,111 @@ +from __future__ import annotations + +from typing import Dict, List, Optional + +from openeo import BatchJob +from openeo.internal.graph_building import FlatGraphableMixin, MultiLeafGraph +from openeo.rest import OpenEoClientException +from openeo.rest.connection import Connection, extract_connections + + +class MultiResult(FlatGraphableMixin): + """ + Helper to create and run batch jobs with process graphs + that contain multiple result nodes + or, more generally speaking, multiple process graph "leaf" nodes. + + Provide multiple + :py:class:`~openeo.rest.datacube.DataCube`/:py:class:`~openeo.rest.vectorcube.VectorCube` + instances to the constructor, + and start a batch job from that, + for example as follows: + + .. code-block:: python + + from openeo import MultiResult + + cube1 = ... + cube2 = ... + multi_result = MultiResult([cube1, cube2]) + job = multi_result.create_job() + + .. seealso:: + + :ref:`multi-result-process-graphs` + + .. versionadded:: 0.35.0 + """ + + __slots__ = ("_multi_leaf_graph", "_connection") + + def __init__(self, leaves: List[FlatGraphableMixin], connection: Optional[Connection] = None): + """ + Build a :py:class:`MultiResult` instance from multiple leaf nodes + + :param leaves: list of objects that can be + converted to an openEO-style (flat) process graph representation, + typically :py:class:`~openeo.rest.datacube.DataCube` + or :py:class:`~openeo.rest.vectorcube.VectorCube` instances. + :param connection: Optional connection to use for creating/starting batch jobs, + for special use cases where the provided leaf instances + are not already associated with a connection. + """ + self._multi_leaf_graph = MultiLeafGraph(leaves=leaves) + self._connection = self._extract_connection(leaves=leaves, connection=connection) + + @staticmethod + def _extract_connection(leaves: List[FlatGraphableMixin], connection: Optional[Connection] = None) -> Connection: + """ + Extract common connection from leaves and/or explicitly provided connection. + Fails if there are multiple or none. + """ + connections = set() + if connection: + connections.add(connection) + connections.update(extract_connections(leaves)) + + if len(connections) == 1: + return connections.pop() + elif len(connections) == 0: + raise OpenEoClientException("No connection in any of the MultiResult leaves") + else: + raise OpenEoClientException("MultiResult with multiple different connections") + + def flat_graph(self) -> Dict[str, dict]: + return self._multi_leaf_graph.flat_graph() + + def create_job( + self, + *, + title: Optional[str] = None, + description: Optional[str] = None, + additional: Optional[dict] = None, + job_options: Optional[dict] = None, + validate: Optional[bool] = None, + ) -> BatchJob: + return self._connection.create_job( + process_graph=self._multi_leaf_graph, + title=title, + description=description, + additional=additional, + job_options=job_options, + validate=validate, + ) + + def execute_batch( + self, + *, + title: Optional[str] = None, + description: Optional[str] = None, + additional: Optional[dict] = None, + job_options: Optional[dict] = None, + validate: Optional[bool] = None, + ) -> BatchJob: + job = self.create_job( + title=title, + description=description, + additional=additional, + job_options=job_options, + validate=validate, + ) + return job.run_synchronous() diff --git a/lib/openeo/rest/rest_capabilities.py b/lib/openeo/rest/rest_capabilities.py new file mode 100644 index 000000000..00922c261 --- /dev/null +++ b/lib/openeo/rest/rest_capabilities.py @@ -0,0 +1,54 @@ +from typing import List, Optional + +from openeo.capabilities import Capabilities +from openeo.internal.jupyter import render_component +from openeo.util import deep_get + + +class RESTCapabilities(Capabilities): + """Represents REST capabilities of a connection / back end.""" + + def __init__(self, data: dict, url: str = None): + super(RESTCapabilities, self).__init__(data) + self.capabilities = data + self.url = url + + def get(self, key: str, default=None): + return self.capabilities.get(key, default) + + def deep_get(self, *keys, default=None): + return deep_get(self.capabilities, *keys, default=default) + + def api_version(self) -> str: + """ Get openEO version.""" + if 'api_version' in self.capabilities: + return self.capabilities.get('api_version') + else: + # Legacy/deprecated + return self.capabilities.get('version') + + def list_features(self): + """ List all supported features / endpoints.""" + return self.capabilities.get('endpoints') + + def has_features(self, method_name): + """ Check whether a feature / endpoint is supported.""" + # Field: endpoints > ... TODO + pass + + def supports_endpoint(self, path: str, method="GET"): + return any( + endpoint.get("path") == path and method.upper() in endpoint.get("methods", []) + for endpoint in self.capabilities.get("endpoints", []) + ) + + def currency(self) -> Optional[str]: + """Get default billing currency.""" + return self.deep_get("billing", "currency", default=None) + + def list_plans(self) -> List[dict]: + """List all billing plans.""" + return self.deep_get("billing", "plans", default=[]) + + def _repr_html_(self): + return render_component("capabilities", data = self.capabilities, parameters = {"url": self.url}) diff --git a/lib/openeo/rest/service.py b/lib/openeo/rest/service.py new file mode 100644 index 000000000..a12383695 --- /dev/null +++ b/lib/openeo/rest/service.py @@ -0,0 +1,58 @@ +from __future__ import annotations + +import typing +from typing import List, Optional, Union + +from openeo.api.logs import LogEntry, log_level_name +from openeo.internal.jupyter import VisualDict, VisualList + +if typing.TYPE_CHECKING: + # Imports for type checking only (circular import issue at runtime). + from openeo.rest.connection import Connection + + +class Service: + """Represents a secondary web service in openeo.""" + + def __init__(self, service_id: str, connection: Connection): + # Unique identifier of the secondary web service (string) + self.service_id = service_id + self.connection = connection + + def __repr__(self): + return '<{c} service_id={i!r}>'.format(c=self.__class__.__name__, i=self.service_id) + + def _repr_html_(self): + data = self.describe_service() + currency = self.connection.capabilities().currency() + return VisualDict('service', data = data, parameters = {'currency': currency}) + + def describe_service(self): + """ Get all information about a secondary web service.""" + # GET /services/{service_id} + return self.connection.get("/services/{}".format(self.service_id), expected_status=200).json() + + def update_service(self, process_graph=None, title=None, description=None, enabled=None, configuration=None, plan=None, budget=None, additional=None): + """ Update a secondary web service.""" + # PATCH /services/{service_id} + raise NotImplementedError + + def delete_service(self): + """ Delete a secondary web service.""" + # DELETE /services/{service_id} + self.connection.delete("/services/{}".format(self.service_id), expected_status=204) + + def logs( + self, offset: Optional[str] = None, level: Optional[Union[str, int]] = None + ) -> List[LogEntry]: + """Retrieve service logs.""" + url = f"/service/{self.service_id}/logs" + params = {} + if offset is not None: + params["offset"] = offset + if level is not None: + params["level"] = log_level_name(level) + resp = self.connection.get(url, params=params, expected_status=200) + logs = resp.json()["logs"] + entries = [LogEntry(log) for log in logs] + return VisualList("logs", data=entries) diff --git a/lib/openeo/rest/udp.py b/lib/openeo/rest/udp.py new file mode 100644 index 000000000..0df9015ab --- /dev/null +++ b/lib/openeo/rest/udp.py @@ -0,0 +1,124 @@ +from __future__ import annotations + +import typing +from pathlib import Path +from typing import List, Optional, Union + +from openeo.api.process import Parameter +from openeo.internal.graph_building import FlatGraphableMixin, as_flat_graph +from openeo.internal.jupyter import render_component +from openeo.internal.processes.builder import ProcessBuilderBase +from openeo.internal.warnings import deprecated +from openeo.util import dict_no_none + +if typing.TYPE_CHECKING: + # Imports for type checking only (circular import issue at runtime). + from openeo.rest.connection import Connection + + +def build_process_dict( + process_graph: Union[dict, FlatGraphableMixin, Path, List[FlatGraphableMixin]], + process_id: Optional[str] = None, + summary: Optional[str] = None, + description: Optional[str] = None, + parameters: Optional[List[Union[Parameter, dict]]] = None, + returns: Optional[dict] = None, + categories: Optional[List[str]] = None, + examples: Optional[List[dict]] = None, + links: Optional[List[dict]] = None, +) -> dict: + """ + Build a dictionary describing a process with metadaa (`process_graph`, `parameters`, `description`, ...) + + :param process_graph: dict or builder representing a process graph + :param process_id: identifier of the process + :param summary: short summary of what the process does + :param description: detailed description + :param parameters: list of process parameters (which have name, schema, default value, ...) + :param returns: description and schema of what the process returns + :param categories: list of categories + :param examples: list of examples, may be used for unit tests + :param links: list of links related to the process + :return: dictionary in openEO "process graph with metadata" format + """ + process = dict_no_none( + process_graph=as_flat_graph(process_graph), + id=process_id, + summary=summary, + description=description, + returns=returns, + categories=categories, + examples=examples, + links=links + ) + if parameters is not None: + process["parameters"] = [ + (p if isinstance(p, Parameter) else Parameter(**p)).to_dict() + for p in parameters + ] + return process + + +class RESTUserDefinedProcess: + """ + Wrapper for a user-defined process stored (or to be stored) on an openEO back-end + """ + + def __init__(self, user_defined_process_id: str, connection: Connection): + self.user_defined_process_id = user_defined_process_id + self._connection = connection + self._connection.assert_user_defined_process_support() + + def _repr_html_(self): + process = self.describe() + return render_component('process', data=process, parameters = {'show-graph': True, 'provide-download': False}) + + def store( + self, + process_graph: Union[dict, FlatGraphableMixin], + parameters: Optional[List[Union[Parameter, dict]]] = None, + public: bool = False, + summary: Optional[str] = None, + description: Optional[str] = None, + returns: Optional[dict] = None, + categories: Optional[List[str]] = None, + examples: Optional[List[dict]] = None, + links: Optional[List[dict]] = None, + ): + """Store a process graph and its metadata on the backend as a user-defined process""" + process = build_process_dict( + process_graph=process_graph, parameters=parameters, + summary=summary, description=description, returns=returns, + categories=categories, examples=examples, links=links, + ) + + # TODO: this "public" flag is not standardized yet EP-3609, https://github.com/Open-EO/openeo-api/issues/310 + process["public"] = public + + self._connection._preflight_validation(pg_with_metadata={"process": process}) + self._connection.put( + path="/process_graphs/{}".format(self.user_defined_process_id), json=process, expected_status=200 + ) + + @deprecated( + "Use `store` instead. Method `update` is misleading: OpenEO API does not provide (partial) updates" + " of user-defined processes, only fully overwriting 'store' operations.", + version="0.4.11") + def update( + self, process_graph: Union[dict, ProcessBuilderBase], parameters: List[Union[Parameter, dict]] = None, + public: bool = False, summary: str = None, description: str = None + ): + self.store(process_graph=process_graph, parameters=parameters, public=public, summary=summary, + description=description) + + def describe(self) -> dict: + """Get metadata of this user-defined process.""" + # TODO: parse the "parameters" to Parameter objects? + return self._connection.get(path="/process_graphs/{}".format(self.user_defined_process_id)).json() + + def delete(self) -> None: + """Remove user-defined process from back-end""" + self._connection.delete(path="/process_graphs/{}".format(self.user_defined_process_id), expected_status=204) + + def validate(self) -> None: + raise NotImplementedError diff --git a/lib/openeo/rest/userfile.py b/lib/openeo/rest/userfile.py new file mode 100644 index 000000000..5e0e94e03 --- /dev/null +++ b/lib/openeo/rest/userfile.py @@ -0,0 +1,100 @@ +from __future__ import annotations + +import typing +from pathlib import Path, PurePosixPath +from typing import Any, Dict, Optional, Union + +from openeo.rest import DEFAULT_DOWNLOAD_CHUNK_SIZE +from openeo.util import ensure_dir + +if typing.TYPE_CHECKING: + # Imports for type checking only (circular import issue at runtime). + from openeo.rest.connection import Connection + + +class UserFile: + """ + Handle to a (user-uploaded) file in the user workspace on a openEO back-end. + """ + + def __init__( + self, + path: Union[str, PurePosixPath, None], + *, + connection: Connection, + metadata: Optional[dict] = None, + ): + if path: + pass + elif metadata and metadata.get("path"): + path = metadata.get("path") + else: + raise ValueError( + "File path should be specified through `path` or `metadata` argument." + ) + + self.path = PurePosixPath(path) + self.metadata = metadata or {"path": path} + self.connection = connection + + @classmethod + def from_metadata(cls, metadata: dict, connection: Connection) -> UserFile: + """Build :py:class:`UserFile` from a workspace file metadata dictionary.""" + return cls(path=None, connection=connection, metadata=metadata) + + def __repr__(self): + return "<{c} file={i!r}>".format(c=self.__class__.__name__, i=self.path) + + def _get_endpoint(self) -> str: + return f"/files/{self.path!s}" + + def download(self, target: Union[Path, str] = None) -> Path: + """ + Downloads a user-uploaded file from the user workspace on the back-end + locally to the given location. + + :param target: local download target path. Can be an existing folder + (in which case the file name advertised by backend will be used) + or full file name. By default, the working directory will be used. + """ + response = self.connection.get( + self._get_endpoint(), expected_status=200, stream=True + ) + + target = Path(target or Path.cwd()) + if target.is_dir(): + target = target / self.path.name + ensure_dir(target.parent) + + with target.open(mode="wb") as f: + for chunk in response.iter_content(chunk_size=DEFAULT_DOWNLOAD_CHUNK_SIZE): + f.write(chunk) + + return target + + def upload(self, source: Union[Path, str]) -> UserFile: + """ + Uploads a local file to the path corresponding to this :py:class:`UserFile` in the user workspace + and returns new :py:class:`UserFile` of newly uploaded file. + + .. tip:: + Usually you'll just need + :py:meth:`Connection.upload_file() ` + instead of this :py:class:`UserFile` method. + + If the file exists in the user workspace it will be replaced. + + :param source: A path to a file on the local file system to upload. + :return: new :py:class:`UserFile` instance of the newly uploaded file + """ + return self.connection.upload_file(source, target=self.path) + + def delete(self): + """Delete the user-uploaded file from the user workspace on the back-end.""" + self.connection.delete(self._get_endpoint(), expected_status=204) + + def to_dict(self) -> Dict[str, Any]: + """Returns the provided metadata as dict.""" + # This is used in internal/jupyter.py to detect and get the original metadata. + # TODO: make this more explicit with an internal API? + return self.metadata diff --git a/lib/openeo/rest/vectorcube.py b/lib/openeo/rest/vectorcube.py new file mode 100644 index 000000000..51c2bf69e --- /dev/null +++ b/lib/openeo/rest/vectorcube.py @@ -0,0 +1,621 @@ +from __future__ import annotations + +import json +import pathlib +import typing +from typing import Callable, List, Optional, Tuple, Union + +import shapely.geometry.base + +import openeo.rest.datacube +from openeo.api.process import Parameter +from openeo.internal.documentation import openeo_process +from openeo.internal.graph_building import PGNode +from openeo.internal.warnings import legacy_alias +from openeo.metadata import CollectionMetadata, CubeMetadata, Dimension +from openeo.rest._datacube import ( + THIS, + UDF, + _ensure_save_result, + _ProcessGraphAbstraction, + build_child_callback, +) +from openeo.rest.job import BatchJob +from openeo.rest.mlmodel import MlModel +from openeo.util import InvalidBBoxException, dict_no_none, guess_format, to_bbox_dict + +if typing.TYPE_CHECKING: + # Imports for type checking only (circular import issue at runtime). + from openeo import Connection + + +class VectorCube(_ProcessGraphAbstraction): + """ + A Vector Cube, or 'Vector Collection' is a data structure containing 'Features': + https://www.w3.org/TR/sdw-bp/#dfn-feature + + The features in this cube are restricted to have a geometry. Geometries can be points, lines, polygons etcetera. + A geometry is specified in a 'coordinate reference system'. https://www.w3.org/TR/sdw-bp/#dfn-coordinate-reference-system-(crs) + """ + + _DEFAULT_VECTOR_FORMAT = "GeoJSON" + + def __init__(self, graph: PGNode, connection: Union[Connection, None], metadata: Optional[CubeMetadata] = None): + super().__init__(pgnode=graph, connection=connection) + self.metadata = metadata + + @classmethod + def _build_metadata(cls, add_properties: bool = False) -> CollectionMetadata: + """Helper to build a (minimal) `CollectionMetadata` object.""" + # Vector cubes have at least a "geometry" dimension + dimensions = [Dimension(name="geometry", type="geometry")] + if add_properties: + dimensions.append(Dimension(name="properties", type="other")) + # TODO #464: use a more generic metadata container than "collection" metadata + return CollectionMetadata(metadata={}, dimensions=dimensions) + + def process( + self, + process_id: str, + arguments: dict = None, + metadata: Optional[CollectionMetadata] = None, + namespace: Optional[str] = None, + **kwargs, + ) -> VectorCube: + """ + Generic helper to create a new VectorCube by applying a process. + + :param process_id: process id of the process. + :param args: argument dictionary for the process. + :return: new VectorCube instance + """ + pg = self._build_pgnode(process_id=process_id, arguments=arguments, namespace=namespace, **kwargs) + return VectorCube(graph=pg, connection=self._connection, metadata=metadata or self.metadata) + + @classmethod + @openeo_process + def load_geojson( + cls, + connection: Connection, + data: Union[dict, str, pathlib.Path, shapely.geometry.base.BaseGeometry, Parameter], + properties: Optional[List[str]] = None, + ) -> VectorCube: + """ + Converts GeoJSON data as defined by RFC 7946 into a vector data cube. + + :param connection: the connection to use to connect with the openEO back-end. + :param data: the geometry to load. One of: + + - GeoJSON-style data structure: e.g. a dictionary with ``"type": "Polygon"`` and ``"coordinates"`` fields + - a path to a local GeoJSON file + - a GeoJSON string + - a shapely geometry object + + :param properties: A list of properties from the GeoJSON file to construct an additional dimension from. + :return: new VectorCube instance + + .. warning:: EXPERIMENTAL: this process is experimental with the potential for major things to change. + + .. versionadded:: 0.22.0 + """ + # TODO: unify with `DataCube._get_geometry_argument` + # TODO #457 also support client side fetching of GeoJSON from URL? + if isinstance(data, str) and data.strip().startswith("{"): + # Assume JSON dump + geometry = json.loads(data) + elif isinstance(data, (str, pathlib.Path)): + # Assume local file + with pathlib.Path(data).open(mode="r", encoding="utf-8") as f: + geometry = json.load(f) + assert isinstance(geometry, dict) + elif isinstance(data, shapely.geometry.base.BaseGeometry): + geometry = shapely.geometry.mapping(data) + elif isinstance(data, Parameter): + geometry = data + elif isinstance(data, dict): + geometry = data + else: + raise ValueError(data) + # TODO #457 client side verification of GeoJSON construct: valid type, valid structure, presence of CRS, ...? + + pg = PGNode(process_id="load_geojson", data=geometry, properties=properties or []) + # TODO #457 always a "properties" dimension? https://github.com/Open-EO/openeo-processes/issues/448 + metadata = cls._build_metadata(add_properties=True) + return cls(graph=pg, connection=connection, metadata=metadata) + + @classmethod + @openeo_process + def load_url(cls, connection: Connection, url: str, format: str, options: Optional[dict] = None) -> VectorCube: + """ + Loads a file from a URL + + :param connection: the connection to use to connect with the openEO back-end. + :param url: The URL to read from. Authentication details such as API keys or tokens may need to be included in the URL. + :param format: The file format to use when loading the data. + :param options: The file format parameters to use when reading the data. + Must correspond to the parameters that the server reports as supported parameters for the chosen ``format`` + :return: new VectorCube instance + + .. warning:: EXPERIMENTAL: this process is experimental with the potential for major things to change. + + .. versionadded:: 0.22.0 + """ + pg = PGNode(process_id="load_url", arguments=dict_no_none(url=url, format=format, options=options)) + # TODO #457 always a "properties" dimension? https://github.com/Open-EO/openeo-processes/issues/448 + metadata = cls._build_metadata(add_properties=True) + return cls(graph=pg, connection=connection, metadata=metadata) + + @openeo_process + def run_udf( + self, + udf: Union[str, UDF], + runtime: Optional[str] = None, + version: Optional[str] = None, + context: Optional[dict] = None, + ) -> VectorCube: + """ + Run a UDF on the vector cube. + + It is recommended to provide the UDF just as :py:class:`UDF ` instance. + (the other arguments could be used to override UDF parameters if necessary). + + :param udf: UDF code as a string or :py:class:`UDF ` instance + :param runtime: UDF runtime + :param version: UDF version + :param context: UDF context + + .. warning:: EXPERIMENTAL: not generally supported, API subject to change. + + .. versionadded:: 0.10.0 + + .. versionchanged:: 0.16.0 + Added support to pass self-contained :py:class:`UDF ` instance. + """ + if isinstance(udf, UDF): + # `UDF` instance is preferred usage pattern, but allow overriding. + version = version or udf.version + context = context or udf.context + runtime = runtime or udf.get_runtime(connection=self.connection) + udf = udf.code + else: + if not runtime: + raise ValueError("Argument `runtime` must be specified") + return self.process( + process_id="run_udf", + data=self, udf=udf, runtime=runtime, + arguments=dict_no_none({"version": version, "context": context}), + ) + + @openeo_process + def save_result(self, format: Union[str, None] = "GeoJSON", options: dict = None): + # TODO #401: guard against duplicate save_result nodes? + return self.process( + process_id="save_result", + arguments={ + "data": self, + "format": format or "GeoJSON", + "options": options or {}, + }, + ) + + def execute(self, *, validate: Optional[bool] = None) -> dict: + """Executes the process graph.""" + return self._connection.execute(self.flat_graph(), validate=validate) + + def download( + self, + outputfile: Optional[Union[str, pathlib.Path]] = None, + format: Optional[str] = None, + options: Optional[dict] = None, + *, + validate: Optional[bool] = None, + auto_add_save_result: bool = True, + ) -> Union[None, bytes]: + """ + Execute synchronously and download the vector cube. + + The result will be stored to the output path, when specified. + If no output path (or ``None``) is given, the raw download content will be returned as ``bytes`` object. + + :param outputfile: (optional) output file to store the result to + :param format: (optional) output format to use. + :param options: (optional) additional output format options. + :param validate: Optional toggle to enable/prevent validation of the process graphs before execution + (overruling the connection's ``auto_validate`` setting). + :param auto_add_save_result: Automatically add a ``save_result`` node to the process graph if there is none yet. + + .. versionchanged:: 0.21.0 + When not specified explicitly, output format is guessed from output file extension. + + .. versionchanged:: 0.32.0 + Added ``auto_add_save_result`` option + """ + # TODO #278 centralize download/create_job/execute_job logic in DataCube, VectorCube, MlModel, ... + cube = self + if auto_add_save_result: + cube = _ensure_save_result( + cube=cube, + format=format, + options=options, + weak_format=guess_format(outputfile) if outputfile else None, + default_format=self._DEFAULT_VECTOR_FORMAT, + method="VectorCube.download()", + ) + return self._connection.download(cube.flat_graph(), outputfile=outputfile, validate=validate) + + def execute_batch( + self, + outputfile: Optional[Union[str, pathlib.Path]] = None, + out_format: Optional[str] = None, + *, + title: Optional[str] = None, + description: Optional[str] = None, + plan: Optional[str] = None, + budget: Optional[float] = None, + print=print, + max_poll_interval: float = 60, + connection_retry_interval: float = 30, + additional: Optional[dict] = None, + job_options: Optional[dict] = None, + validate: Optional[bool] = None, + auto_add_save_result: bool = True, + # TODO: avoid using kwargs as format options + **format_options, + ) -> BatchJob: + """ + Evaluate the process graph by creating a batch job, and retrieving the results when it is finished. + This method is mostly recommended if the batch job is expected to run in a reasonable amount of time. + + For very long running jobs, you probably do not want to keep the client running. + + :param additional: additional (top-level) properties to set in the request body + :param job_options: dictionary of job options to pass to the backend + (under top-level property "job_options") + :param outputfile: The path of a file to which a result can be written + :param out_format: (optional) output format to use. + :param format_options: (optional) additional output format options + :param validate: Optional toggle to enable/prevent validation of the process graphs before execution + (overruling the connection's ``auto_validate`` setting). + :param auto_add_save_result: Automatically add a ``save_result`` node to the process graph if there is none yet. + + .. versionchanged:: 0.21.0 + When not specified explicitly, output format is guessed from output file extension. + + .. versionchanged:: 0.32.0 + Added ``auto_add_save_result`` option + + .. versionadded:: 0.36.0 + Added argument ``additional``. + """ + cube = self + if auto_add_save_result: + cube = _ensure_save_result( + cube=cube, + format=out_format, + options=format_options, + weak_format=guess_format(outputfile) if outputfile else None, + default_format=self._DEFAULT_VECTOR_FORMAT, + method="VectorCube.execute_batch()", + ) + job = cube.create_job( + title=title, + description=description, + plan=plan, + budget=budget, + additional=additional, + job_options=job_options, + validate=validate, + auto_add_save_result=False, + ) + return job.run_synchronous( + # TODO #135 support multi file result sets too + outputfile=outputfile, + print=print, max_poll_interval=max_poll_interval, connection_retry_interval=connection_retry_interval + ) + + def create_job( + self, + out_format: Optional[str] = None, + *, + title: Optional[str] = None, + description: Optional[str] = None, + plan: Optional[str] = None, + budget: Optional[float] = None, + additional: Optional[dict] = None, + job_options: Optional[dict] = None, + validate: Optional[bool] = None, + auto_add_save_result: bool = True, + **format_options, + ) -> BatchJob: + """ + Sends a job to the backend and returns a ClientJob instance. + + :param out_format: String Format of the job result. + :param title: job title + :param description: job description + :param plan: The billing plan to process and charge the job with + :param budget: Maximum budget to be spent on executing the job. + Note that some backends do not honor this limit. + :param additional: additional (top-level) properties to set in the request body + :param job_options: dictionary of job options to pass to the backend + (under top-level property "job_options") + :param format_options: String Parameters for the job result format + :param validate: Optional toggle to enable/prevent validation of the process graphs before execution + (overruling the connection's ``auto_validate`` setting). + :param auto_add_save_result: Automatically add a ``save_result`` node to the process graph if there is none yet. + + :return: Created job. + + .. versionchanged:: 0.32.0 + Added ``auto_add_save_result`` option + """ + # TODO: avoid using all kwargs as format_options + # TODO #278 centralize download/create_job/execute_job logic in DataCube, VectorCube, MlModel, ... + cube = self + if auto_add_save_result: + cube = _ensure_save_result( + cube=cube, + format=out_format, + options=format_options or None, + default_format=self._DEFAULT_VECTOR_FORMAT, + method="VectorCube.create_job()", + ) + return self._connection.create_job( + process_graph=cube.flat_graph(), + title=title, + description=description, + plan=plan, + budget=budget, + additional=additional, + job_options=job_options, + validate=validate, + ) + + send_job = legacy_alias(create_job, name="send_job", since="0.10.0") + + @openeo_process + def filter_bands(self, bands: List[str]) -> VectorCube: + """ + .. versionadded:: 0.22.0 + """ + # TODO #459 docs + return self.process( + process_id="filter_bands", + arguments={"data": THIS, "bands": bands}, + ) + + @openeo_process + def filter_bbox( + self, + *, + west: Optional[float] = None, + south: Optional[float] = None, + east: Optional[float] = None, + north: Optional[float] = None, + extent: Optional[Union[dict, List[float], Tuple[float, float, float, float], Parameter]] = None, + crs: Optional[int] = None, + ) -> VectorCube: + """ + .. versionadded:: 0.22.0 + """ + # TODO #459 docs + if any(c is not None for c in [west, south, east, north]): + if extent is not None: + raise InvalidBBoxException("Don't specify both west/south/east/north and extent") + extent = dict_no_none(west=west, south=south, east=east, north=north) + + if isinstance(extent, Parameter): + pass + else: + extent = to_bbox_dict(extent, crs=crs) + return self.process( + process_id="filter_bbox", + arguments={"data": THIS, "extent": extent}, + ) + + @openeo_process + def filter_labels( + self, condition: Union[PGNode, Callable], dimension: str, context: Optional[dict] = None + ) -> VectorCube: + """ + Filters the dimension labels in the data cube for the given dimension. + Only the dimension labels that match the specified condition are preserved, + all other labels with their corresponding data get removed. + + :param condition: the "child callback" which will be given a single label value (number or string) + and returns a boolean expressing if the label should be preserved. + Also see :ref:`callbackfunctions`. + :param dimension: The name of the dimension to filter on. + + .. versionadded:: 0.22.0 + """ + condition = build_child_callback(condition, parent_parameters=["value"]) + return self.process( + process_id="filter_labels", + arguments=dict_no_none(data=THIS, condition=condition, dimension=dimension, context=context), + ) + + @openeo_process + def filter_vector( + self, geometries: Union["VectorCube", shapely.geometry.base.BaseGeometry, dict], relation: str = "intersects" + ) -> VectorCube: + """ + .. versionadded:: 0.22.0 + """ + # TODO #459 docs + if not isinstance(geometries, (VectorCube, Parameter)): + geometries = self.load_geojson(connection=self.connection, data=geometries) + return self.process( + process_id="filter_vector", + arguments={"data": THIS, "geometries": geometries, "relation": relation}, + ) + + @openeo_process + def fit_class_random_forest( + self, + # TODO #279 #293: target type should be `VectorCube` (with adapters for GeoJSON FeatureCollection, GeoPandas, ...) + target: dict, + # TODO #293 max_variables officially has no default + max_variables: Optional[int] = None, + num_trees: int = 100, + seed: Optional[int] = None, + ) -> MlModel: + """ + Executes the fit of a random forest classification based on the user input of target and predictors. + The Random Forest classification model is based on the approach by Breiman (2001). + + .. warning:: EXPERIMENTAL: not generally supported, API subject to change. + + :param target: The training sites for the classification model as a vector data cube. This is associated with the target + variable for the Random Forest model. The geometry has to be associated with a value to predict (e.g. fractional + forest canopy cover). + :param max_variables: Specifies how many split variables will be used at a node. Default value is `null`, which corresponds to the + number of predictors divided by 3. + :param num_trees: The number of trees build within the Random Forest classification. + :param seed: A randomization seed to use for the random sampling in training. + + .. versionadded:: 0.16.0 + Originally added in version 0.10.0 as :py:class:`DataCube ` method, + but moved to :py:class:`VectorCube` in version 0.16.0. + """ + pgnode = PGNode( + process_id="fit_class_random_forest", + arguments=dict_no_none( + predictors=self, + # TODO #279 strictly per-spec, target should be a `vector-cube`, but due to lack of proper support we are limited to inline GeoJSON for now + target=target, + max_variables=max_variables, + num_trees=num_trees, + seed=seed, + ), + ) + model = MlModel(graph=pgnode, connection=self._connection) + return model + + @openeo_process + def fit_regr_random_forest( + self, + # TODO #279 #293: target type should be `VectorCube` (with adapters for GeoJSON FeatureCollection, GeoPandas, ...) + target: dict, + # TODO #293 max_variables officially has no default + max_variables: Optional[int] = None, + num_trees: int = 100, + seed: Optional[int] = None, + ) -> MlModel: + """ + Executes the fit of a random forest regression based on training data. + The Random Forest regression model is based on the approach by Breiman (2001). + + .. warning:: EXPERIMENTAL: not generally supported, API subject to change. + + :param target: The training sites for the regression model as a vector data cube. + This is associated with the target variable for the Random Forest model. + The geometry has to associated with a value to predict (e.g. fractional forest canopy cover). + :param max_variables: Specifies how many split variables will be used at a node. Default value is `null`, which corresponds to the + number of predictors divided by 3. + :param num_trees: The number of trees build within the Random Forest classification. + :param seed: A randomization seed to use for the random sampling in training. + + .. versionadded:: 0.16.0 + Originally added in version 0.10.0 as :py:class:`DataCube ` method, + but moved to :py:class:`VectorCube` in version 0.16.0. + """ + # TODO #279 #293: `fit_class_random_forest` should be defined on VectorCube instead of DataCube + pgnode = PGNode( + process_id="fit_regr_random_forest", + arguments=dict_no_none( + predictors=self, + # TODO #279 strictly per-spec, target should be a `vector-cube`, but due to lack of proper support we are limited to inline GeoJSON for now + target=target, + max_variables=max_variables, + num_trees=num_trees, + seed=seed, + ), + ) + model = MlModel(graph=pgnode, connection=self._connection) + return model + + @openeo_process + def apply_dimension( + self, + process: Union[str, typing.Callable, UDF, PGNode], + dimension: str, + target_dimension: Optional[str] = None, + context: Optional[dict] = None, + ) -> VectorCube: + """ + Applies a process to all values along a dimension of a data cube. + For example, if the temporal dimension is specified the process will work on the values of a time series. + + The process to apply is specified by providing a callback function in the `process` argument. + + :param process: the "child callback": + the name of a single process, + or a callback function as discussed in :ref:`callbackfunctions`, + or a :py:class:`UDF ` instance. + + The callback should correspond to a process that + receives an array of numerical values + and returns an array of numerical values. + For example: + + - ``"sort"`` (string) + - :py:func:`sort ` (:ref:`predefined openEO process function `) + - ``lambda data: data.concat([42, -3])`` (function or lambda) + + + :param dimension: The name of the source dimension to apply the process on. Fails with a DimensionNotAvailable error if the specified dimension does not exist. + :param target_dimension: The name of the target dimension or null (the default) to use the source dimension + specified in the parameter dimension. By specifying a target dimension, the source dimension is removed. + The target dimension with the specified name and the type other (see add_dimension) is created, if it doesn't exist yet. + :param context: Additional data to be passed to the process. + + :return: A datacube with the UDF applied to the given dimension. + :raises: DimensionNotAvailable + + .. versionadded:: 0.22.0 + """ + process = build_child_callback( + process=process, parent_parameters=["data", "context"], connection=self.connection + ) + arguments = dict_no_none( + { + "data": THIS, + "process": process, + "dimension": dimension, + "target_dimension": target_dimension, + "context": context, + } + ) + return self.process(process_id="apply_dimension", arguments=arguments) + + def vector_to_raster(self, target: openeo.rest.datacube.DataCube) -> openeo.rest.datacube.DataCube: + """ + Converts this vector cube (:py:class:`VectorCube`) into a raster data cube (:py:class:`~openeo.rest.datacube.DataCube`). + The bounding polygon of homogenous areas of pixels is constructed. + + :param target: a reference raster data cube to adopt the CRS/projection/resolution from. + + .. warning:: ``vector_to_raster`` is an experimental, non-standard process. It is not widely supported, and its API is subject to change. + + .. versionadded:: 0.28.0 + + """ + # TODO: this parameter sniffing is a temporary workaround until + # the `target` parameter name rename has fully settled + # https://github.com/Open-EO/openeo-python-driver/issues/274 + # After that has settled, it is still useful to verify assumptions about this non-standard process. + try: + process_spec = self.connection.describe_process("vector_to_raster") + target_parameter = process_spec["parameters"][1]["name"] + assert "target" in target_parameter + except Exception: + target_parameter = "target" + + pg_node = PGNode( + process_id="vector_to_raster", + arguments={"data": self, target_parameter: target}, + ) + # TODO: the correct metadata has to be passed here: + # replace "geometry" dimension with spatial dimensions of the target cube + return openeo.rest.datacube.DataCube(pg_node, connection=self._connection, metadata=self.metadata) diff --git a/lib/openeo/testing/__init__.py b/lib/openeo/testing/__init__.py new file mode 100644 index 000000000..8ad898cba --- /dev/null +++ b/lib/openeo/testing/__init__.py @@ -0,0 +1,37 @@ +""" +Utilities for testing of openEO client workflows. +""" + +import json +from pathlib import Path +from typing import Callable, Optional, Union + + +class TestDataLoader: + """ + Helper to resolve paths to test data files, load them as JSON, optionally preprocess them, etc. + + It's intended to be used as a pytest fixture, e.g. from ``conftest.py``: + + .. code-block:: python + + @pytest.fixture + def test_data() -> TestDataLoader: + return TestDataLoader(root=Path(__file__).parent / "data") + + .. versionadded:: 0.30.0 + """ + + def __init__(self, root: Union[str, Path]): + self.data_root = Path(root) + + def get_path(self, filename: Union[str, Path]) -> Path: + """Get absolute path to a test data file""" + return self.data_root / filename + + def load_json(self, filename: Union[str, Path], preprocess: Optional[Callable[[str], str]] = None) -> dict: + """Parse data from a test JSON file""" + data = self.get_path(filename).read_text(encoding="utf8") + if preprocess: + data = preprocess(data) + return json.loads(data) diff --git a/lib/openeo/testing/results.py b/lib/openeo/testing/results.py new file mode 100644 index 000000000..633ddaf58 --- /dev/null +++ b/lib/openeo/testing/results.py @@ -0,0 +1,386 @@ +""" +Assert functions for comparing actual (batch job) results against expected reference data. +""" + +import json +import logging +import tempfile +from pathlib import Path +from typing import List, Optional, Union + +import xarray +import xarray.testing + +from openeo.rest.job import DEFAULT_JOB_RESULTS_FILENAME, BatchJob, JobResults +from openeo.util import repr_truncate + +_log = logging.getLogger(__name__) + + +_DEFAULT_RTOL = 1e-6 +_DEFAULT_ATOL = 1e-6 + + +def _load_xarray_netcdf(path: Union[str, Path], **kwargs) -> xarray.Dataset: + """ + Load a netCDF file as Xarray Dataset + """ + _log.debug(f"_load_xarray_netcdf: {path!r}") + return xarray.load_dataset(path, **kwargs) + + +def _load_rioxarray_geotiff(path: Union[str, Path], **kwargs) -> xarray.DataArray: + """ + Load a GeoTIFF file as Xarray DataArray (using `rioxarray` extension). + """ + _log.debug(f"_load_rioxarray_geotiff: {path!r}") + try: + import rioxarray + except ImportError as e: + raise ImportError("This feature requires 'rioxarray` as optional dependency.") from e + return rioxarray.open_rasterio(path, **kwargs) + + +def _load_xarray(path: Union[str, Path], **kwargs) -> Union[xarray.Dataset, xarray.DataArray]: + """ + Generically load a netCDF/GeoTIFF file as Xarray Dataset/DataArray. + """ + path = Path(path) + if path.suffix.lower() in {".nc", ".netcdf"}: + return _load_xarray_netcdf(path, **kwargs) + elif path.suffix.lower() in {".tif", ".tiff", ".gtiff", ".geotiff"}: + return _load_rioxarray_geotiff(path, **kwargs) + raise ValueError(f"Unsupported file type: {path}") + + +def _load_json(path: Union[str, Path]) -> dict: + """ + Load a JSON file. + """ + with Path(path).open("r", encoding="utf-8") as f: + return json.load(f) + + +def _as_xarray_dataset(data: Union[str, Path, xarray.Dataset]) -> xarray.Dataset: + """ + Get data as Xarray Dataset (loading from file if needed). + """ + if isinstance(data, (str, Path)): + data = _load_xarray(data) + # TODO auto-convert DataArray to Dataset? + if not isinstance(data, xarray.Dataset): + raise ValueError(f"Unsupported type: {type(data)}") + return data + + +def _as_xarray_dataarray(data: Union[str, Path, xarray.DataArray]) -> xarray.DataArray: + """ + Convert a path to a NetCDF/GeoTIFF file to an Xarray DataArray. + + :param data: path to a NetCDF/GeoTIFF file or Xarray DataArray + :return: Xarray DataArray + """ + if isinstance(data, (str, Path)): + data = _load_xarray(data) + # TODO: auto-convert Dataset to DataArray? + if not isinstance(data, xarray.DataArray): + raise ValueError(f"Unsupported type: {type(data)}") + return data + + +def _compare_xarray_dataarray( + actual: Union[xarray.DataArray, str, Path], + expected: Union[xarray.DataArray, str, Path], + *, + rtol: float = _DEFAULT_RTOL, + atol: float = _DEFAULT_ATOL, +) -> List[str]: + """ + Compare two xarray DataArrays with tolerance and report mismatch issues (as strings) + + Checks that are done (with tolerance): + - (optional) Check fraction of mismatching pixels (difference exceeding some tolerance). + If fraction is below a given threshold, ignore these mismatches in subsequent comparisons. + If fraction is above the threshold, report this issue. + - Compare actual and expected data with `xarray.testing.assert_allclose` and specified tolerances. + + :return: list of issues (empty if no issues) + """ + # TODO: make this a public function? + # TODO: option for nodata fill value? + # TODO: option to include data type check? + # TODO: option to cast to some data type (or even rescale) before comparison? + # TODO: also compare attributes of the DataArray? + actual = _as_xarray_dataarray(actual) + expected = _as_xarray_dataarray(expected) + issues = [] + + # `xarray.testing.assert_allclose` currently does not always + # provides detailed information about shape/dimension mismatches + # so we enrich the issue listing with some more details + if actual.dims != expected.dims: + issues.append(f"Dimension mismatch: {actual.dims} != {expected.dims}") + for dim in sorted(set(expected.dims).intersection(actual.dims)): + acs = actual.coords[dim].values + ecs = expected.coords[dim].values + if not (acs.shape == ecs.shape and (acs == ecs).all()): + issues.append(f"Coordinates mismatch for dimension {dim!r}: {acs} != {ecs}") + if actual.shape != expected.shape: + issues.append(f"Shape mismatch: {actual.shape} != {expected.shape}") + + try: + xarray.testing.assert_allclose(a=actual, b=expected, rtol=rtol, atol=atol) + except AssertionError as e: + # TODO: message of `assert_allclose` is typically multiline, split it again or make it one line? + issues.append(str(e).strip()) + + return issues + + +def assert_xarray_dataarray_allclose( + actual: Union[xarray.DataArray, str, Path], + expected: Union[xarray.DataArray, str, Path], + *, + rtol: float = _DEFAULT_RTOL, + atol: float = _DEFAULT_ATOL, +): + """ + Assert that two Xarray ``DataArray`` instances are equal (with tolerance). + + :param actual: actual data, provided as Xarray DataArray object or path to NetCDF/GeoTIFF file. + :param expected: expected or reference data, provided as Xarray DataArray object or path to NetCDF/GeoTIFF file. + :param rtol: relative tolerance + :param atol: absolute tolerance + :raises AssertionError: if not equal within the given tolerance + + .. versionadded:: 0.31.0 + + .. warning:: + This function is experimental and subject to change. + """ + issues = _compare_xarray_dataarray(actual=actual, expected=expected, rtol=rtol, atol=atol) + if issues: + raise AssertionError("\n".join(issues)) + + +def _compare_xarray_datasets( + actual: Union[xarray.Dataset, str, Path], + expected: Union[xarray.Dataset, str, Path], + *, + rtol: float = _DEFAULT_RTOL, + atol: float = _DEFAULT_ATOL, +) -> List[str]: + """ + Compare two xarray ``DataSet``s with tolerance and report mismatch issues (as strings) + + :return: list of issues (empty if no issues) + """ + # TODO: make this a public function? + actual = _as_xarray_dataset(actual) + expected = _as_xarray_dataset(expected) + + all_issues = [] + # TODO: just leverage DataSet support in xarray.testing.assert_allclose for all this? + actual_vars = set(actual.data_vars) + expected_vars = set(expected.data_vars) + _log.debug(f"_compare_xarray_datasets: actual_vars={actual_vars!r} expected_vars={expected_vars!r}") + if actual_vars != expected_vars: + all_issues.append(f"Xarray DataSet variables mismatch: {actual_vars} != {expected_vars}") + for var in expected_vars.intersection(actual_vars): + _log.debug(f"_compare_xarray_datasets: comparing variable {var!r}") + issues = _compare_xarray_dataarray(actual[var], expected[var], rtol=rtol, atol=atol) + if issues: + all_issues.append(f"Issues for variable {var!r}:") + all_issues.extend(issues) + return all_issues + + +def assert_xarray_dataset_allclose( + actual: Union[xarray.Dataset, str, Path], + expected: Union[xarray.Dataset, str, Path], + *, + rtol: float = _DEFAULT_RTOL, + atol: float = _DEFAULT_ATOL, +): + """ + Assert that two Xarray ``DataSet`` instances are equal (with tolerance). + + :param actual: actual data, provided as Xarray Dataset object or path to NetCDF/GeoTIFF file + :param expected: expected or reference data, provided as Xarray Dataset object or path to NetCDF/GeoTIFF file. + :param rtol: relative tolerance + :param atol: absolute tolerance + :raises AssertionError: if not equal within the given tolerance + + .. versionadded:: 0.31.0 + + .. warning:: + This function is experimental and subject to change. + """ + issues = _compare_xarray_datasets(actual=actual, expected=expected, rtol=rtol, atol=atol) + if issues: + raise AssertionError("\n".join(issues)) + + +def assert_xarray_allclose( + actual: Union[xarray.Dataset, xarray.DataArray, str, Path], + expected: Union[xarray.Dataset, xarray.DataArray, str, Path], + *, + rtol: float = _DEFAULT_RTOL, + atol: float = _DEFAULT_ATOL, +): + """ + Assert that two Xarray ``DataSet`` or ``DataArray`` instances are equal (with tolerance). + + :param actual: actual data, provided as Xarray object or path to NetCDF/GeoTIFF file. + :param expected: expected or reference data, provided as Xarray object or path to NetCDF/GeoTIFF file. + :param rtol: relative tolerance + :param atol: absolute tolerance + :raises AssertionError: if not equal within the given tolerance + + .. versionadded:: 0.31.0 + + .. warning:: + This function is experimental and subject to change. + """ + if isinstance(actual, (str, Path)): + actual = _load_xarray(actual) + if isinstance(expected, (str, Path)): + expected = _load_xarray(expected) + + if isinstance(actual, xarray.Dataset) and isinstance(expected, xarray.Dataset): + assert_xarray_dataset_allclose(actual, expected, rtol=rtol, atol=atol) + elif isinstance(actual, xarray.DataArray) and isinstance(expected, xarray.DataArray): + assert_xarray_dataarray_allclose(actual, expected, rtol=rtol, atol=atol) + else: + raise ValueError(f"Unsupported types: {type(actual)} and {type(expected)}") + + +def _as_job_results_download( + job_results: Union[BatchJob, JobResults, str, Path], tmp_path: Optional[Path] = None +) -> Path: + """ + Produce a directory with downloaded job results assets and metadata. + + :param job_results: a batch job, job results metadata object or a path + :param tmp_path: root temp path to download results if needed + :return: + """ + # TODO: support download/copy from other sources (e.g. S3, ...) + if isinstance(job_results, BatchJob): + job_results = job_results.get_results() + if isinstance(job_results, JobResults): + download_dir = tempfile.mkdtemp(dir=tmp_path, prefix=job_results.get_job_id() + "-") + _log.info(f"Downloading results from job {job_results.get_job_id()} to {download_dir}") + job_results.download_files(target=download_dir) + job_results = download_dir + if isinstance(job_results, (str, Path)): + return Path(job_results) + else: + raise ValueError(f"Unsupported type: {type(job_results)}") + + +def _compare_job_results( + actual: Union[BatchJob, JobResults, str, Path], + expected: Union[BatchJob, JobResults, str, Path], + *, + rtol: float = _DEFAULT_RTOL, + atol: float = _DEFAULT_ATOL, + tmp_path: Optional[Path] = None, +) -> List[str]: + """ + Compare two job results sets (directories with downloaded assets and metadata, + e.g. as produced by ``JobResults.download_files()``) + + :return: list of issues (empty if no issues) + """ + actual_dir = _as_job_results_download(actual, tmp_path=tmp_path) + expected_dir = _as_job_results_download(expected, tmp_path=tmp_path) + _log.info(f"Comparing job results: {actual_dir!r} vs {expected_dir!r}") + + all_issues = [] + + actual_filenames = set(p.name for p in actual_dir.glob("*") if p.is_file()) + expected_filenames = set(p.name for p in expected_dir.glob("*") if p.is_file()) + if actual_filenames != expected_filenames: + all_issues.append(f"File set mismatch: {actual_filenames} != {expected_filenames}") + + for filename in expected_filenames.intersection(actual_filenames): + actual_path = actual_dir / filename + expected_path = expected_dir / filename + if filename == DEFAULT_JOB_RESULTS_FILENAME: + issues = _compare_job_result_metadata(actual=actual_path, expected=expected_path) + if issues: + all_issues.append(f"Issues for metadata file {filename!r}:") + all_issues.extend(issues) + elif expected_path.suffix.lower() in {".nc", ".netcdf"}: + issues = _compare_xarray_datasets(actual=actual_path, expected=expected_path, rtol=rtol, atol=atol) + if issues: + all_issues.append(f"Issues for file {filename!r}:") + all_issues.extend(issues) + elif expected_path.suffix.lower() in {".tif", ".tiff", ".gtiff", ".geotiff"}: + issues = _compare_xarray_dataarray(actual=actual_path, expected=expected_path, rtol=rtol, atol=atol) + if issues: + all_issues.append(f"Issues for file {filename!r}:") + all_issues.extend(issues) + else: + _log.warning(f"Unhandled job result asset {filename!r}") + + return all_issues + + +def _compare_job_result_metadata( + actual: Union[str, Path], + expected: Union[str, Path], +) -> List[str]: + issues = [] + actual_metadata = _load_json(actual) + expected_metadata = _load_json(expected) + + # Check "derived_from" links + actual_derived_from = set(k["href"] for k in actual_metadata.get("links", []) if k["rel"] == "derived_from") + expected_derived_from = set(k["href"] for k in expected_metadata.get("links", []) if k["rel"] == "derived_from") + + if actual_derived_from != expected_derived_from: + actual_only = actual_derived_from - expected_derived_from + expected_only = expected_derived_from - actual_derived_from + common = actual_derived_from.intersection(expected_derived_from) + issues.append( + f"Differing 'derived_from' links ({len(common)} common, {len(actual_only)} only in actual, {len(expected_only)} only in expected):\n" + f" only in actual: {repr_truncate(actual_only, width=1000)}\n" + f" only in expected: {repr_truncate(expected_only, width=1000)}." + ) + + # TODO: more metadata checks (e.g. spatial and temporal extents)? + + return issues + + +def assert_job_results_allclose( + actual: Union[BatchJob, JobResults, str, Path], + expected: Union[BatchJob, JobResults, str, Path], + *, + rtol: float = _DEFAULT_RTOL, + atol: float = _DEFAULT_ATOL, + tmp_path: Optional[Path] = None, +): + """ + Assert that two job results sets are equal (with tolerance). + + :param actual: actual job results, provided as :py:class:`~openeo.rest.job.BatchJob` object, + :py:meth:`~openeo.rest.job.JobResults` object or path to directory with downloaded assets. + :param expected: expected job results, provided as :py:class:`~openeo.rest.job.BatchJob` object, + :py:meth:`~openeo.rest.job.JobResults` object or path to directory with downloaded assets. + :param rtol: relative tolerance + :param atol: absolute tolerance + :param tmp_path: root temp path to download results if needed. + It's recommended to pass pytest's `tmp_path` fixture here + :raises AssertionError: if not equal within the given tolerance + + .. versionadded:: 0.31.0 + + .. warning:: + This function is experimental and subject to change. + """ + issues = _compare_job_results(actual, expected, rtol=rtol, atol=atol, tmp_path=tmp_path) + if issues: + raise AssertionError("\n".join(issues)) diff --git a/lib/openeo/testing/stac.py b/lib/openeo/testing/stac.py new file mode 100644 index 000000000..4f0b455a8 --- /dev/null +++ b/lib/openeo/testing/stac.py @@ -0,0 +1,110 @@ +from typing import List, Optional, Union + + +class StacDummyBuilder: + """ + Helper to compactly produce STAC Item/Collection/Catalog/... dicts for test purposes + + .. warning:: + This is an experimental API subject to change. + """ + + _EXT_DATACUBE = "https://stac-extensions.github.io/datacube/v2.2.0/schema.json" + + @classmethod + def item( + cls, + *, + id: str = "item123", + stac_version="1.0.0", + datetime: str = "2024-03-08", + properties: Optional[dict] = None, + cube_dimensions: Optional[dict] = None, + stac_extensions: Optional[List[str]] = None, + **kwargs, + ) -> dict: + """Create a STAC Item represented as dictionary.""" + properties = properties or {} + properties.setdefault("datetime", datetime) + + if cube_dimensions is not None: + properties["cube:dimensions"] = cube_dimensions + stac_extensions = cls._add_stac_extension(stac_extensions, cls._EXT_DATACUBE) + + d = { + "type": "Feature", + "stac_version": stac_version, + "id": id, + "geometry": None, + "properties": properties, + "links": [], + "assets": {}, + **kwargs, + } + + if stac_extensions is not None: + d["stac_extensions"] = stac_extensions + return d + + @classmethod + def _add_stac_extension(cls, stac_extensions: Union[List[str], None], stac_extension: str) -> List[str]: + stac_extensions = list(stac_extensions or []) + if stac_extension not in stac_extensions: + stac_extensions.append(stac_extension) + return stac_extensions + + @classmethod + def collection( + cls, + *, + id: str = "collection123", + description: str = "Collection 123", + stac_version: str = "1.0.0", + stac_extensions: Optional[List[str]] = None, + license: str = "proprietary", + extent: Optional[dict] = None, + cube_dimensions: Optional[dict] = None, + summaries: Optional[dict] = None, + ) -> dict: + """Create a STAC Collection represented as dictionary.""" + if extent is None: + extent = {"spatial": {"bbox": [[3, 4, 5, 6]]}, "temporal": {"interval": [["2024-01-01", "2024-05-05"]]}} + + d = { + "type": "Collection", + "stac_version": stac_version, + "id": id, + "description": description, + "license": license, + "extent": extent, + "links": [], + } + if cube_dimensions is not None: + d["cube:dimensions"] = cube_dimensions + stac_extensions = cls._add_stac_extension(stac_extensions, cls._EXT_DATACUBE) + if summaries is not None: + d["summaries"] = summaries + if stac_extensions is not None: + d["stac_extensions"] = stac_extensions + return d + + @classmethod + def catalog( + cls, + *, + id: str = "catalog123", + stac_version: str = "1.0.0", + description: str = "Catalog 123", + stac_extensions: Optional[List[str]] = None, + ) -> dict: + """Create a STAC Catalog represented as dictionary.""" + d = { + "type": "Catalog", + "stac_version": stac_version, + "id": id, + "description": description, + "links": [], + } + if stac_extensions is not None: + d["stac_extensions"] = stac_extensions + return d diff --git a/lib/openeo/udf/__init__.py b/lib/openeo/udf/__init__.py new file mode 100644 index 000000000..387b8bc3d --- /dev/null +++ b/lib/openeo/udf/__init__.py @@ -0,0 +1,13 @@ +from openeo import BaseOpenEoException + + +class OpenEoUdfException(BaseOpenEoException): + pass + + +from openeo.udf.debug import inspect +from openeo.udf.feature_collection import FeatureCollection +from openeo.udf.run_code import execute_local_udf, run_udf_code +from openeo.udf.structured_data import StructuredData +from openeo.udf.udf_data import UdfData +from openeo.udf.xarraydatacube import XarrayDataCube diff --git a/lib/openeo/udf/_compat.py b/lib/openeo/udf/_compat.py new file mode 100644 index 000000000..72a73a020 --- /dev/null +++ b/lib/openeo/udf/_compat.py @@ -0,0 +1,65 @@ +import json +import re +from typing import Union + +# TODO #465 move this to a more general utility subpackage? + +try: + import tomllib +except ImportError: + try: + import tomli as tomllib + except ImportError: + # Will be assigned with fallback implementation below + tomllib = None + + +class FlimsyTomlParser: + """ + This is a rudimentary, low-tech, incomplete implementation of TOML parsing functionality + for simple TOML use cases where the dependency on a full-fledged TOML library is not justified. + For these simple uses cases, it should act as a best-effort drop-in replacement + for the `loads()` functionality from full-fledged TOML libraries + like `tomllib` (part of standard library since Python 3.11) + or `tomli` (`tomllib` backport for earlier Python versions). + """ + + class TomlParseError(ValueError): + pass + + KEY_PAIR_REGEX = re.compile( + r"(?P^[a-z0-9_-]+)\s*=\s*(?P.*(\s+^\s+.*)*(\s+^])?)", + flags=re.MULTILINE | re.VERBOSE | re.IGNORECASE, + ) + + @classmethod + def loads(cls, data: str) -> dict: + if re.search(r"^\[", data, flags=re.MULTILINE): + raise cls.TomlParseError("Tables are not supported") + if re.search(r"^[a-z0-9_-]+\.[a-z0-9_.-]+\s*=", data, flags=re.MULTILINE | re.IGNORECASE): + raise cls.TomlParseError("Dotted keys are not supported") + return { + match.group("key"): cls._parse_toml_value_like_json(match.group("value")) + for match in cls.KEY_PAIR_REGEX.finditer(data) + } + + @classmethod + def _parse_toml_value_like_json(cls, value: str) -> Union[int, float, list]: + """ + Try to parse a TOML value by pretending it's (almost) JSON, + which covers the basics (simple strings, numbers, arrays, a bit of nesting, ...) + """ + # A bit of preprocessing to make it more JSON-like (strip comments, strip trailing commas) + value = re.sub(r"#.*$", "", value, flags=re.MULTILINE) + value = re.sub(r",\s*\]", "]", value) + # Rudimentarily convert single quote strings to double quotes. + value = re.sub("'([^'\"]*)'", r'"\1"', value) + try: + data = json.loads(value) + except json.JSONDecodeError as e: + raise cls.TomlParseError(f"Failed to parse TOML value {value!r}") from e + return data + + +if tomllib is None: + tomllib = FlimsyTomlParser diff --git a/lib/openeo/udf/debug.py b/lib/openeo/udf/debug.py new file mode 100644 index 000000000..3cb408494 --- /dev/null +++ b/lib/openeo/udf/debug.py @@ -0,0 +1,30 @@ +""" +Debug utilities for UDFs +""" +import logging +import os +import sys + +_log = logging.getLogger(__name__) +_user_log = logging.getLogger(os.environ.get("OPENEO_UDF_USER_LOGGER", f"{__name__}.user")) + + +def inspect(data=None, message: str = "", code: str = "User", level: str = "info"): + """ + Implementation of the openEO `inspect` process for UDF contexts. + + Note that it is up to the back-end implementation to properly capture this logging + and include it in the batch job logs. + + :param data: data to log + :param message: message to send in addition to the data + :param code: A label to help identify one or more log entries + :param level: The severity level of this message. Allowed values: "error", "warning", "info", "debug" + + .. versionadded:: 0.10.1 + + .. seealso:: :ref:`udf_logging_with_inspect` + """ + extra = {"data": data, "code": code} + kwargs = {"stacklevel": 2} if sys.version_info >= (3, 8) else {} + _user_log.log(level=logging.getLevelName(level.upper()), msg=message, extra=extra, **kwargs) diff --git a/lib/openeo/udf/feature_collection.py b/lib/openeo/udf/feature_collection.py new file mode 100644 index 000000000..329c618cc --- /dev/null +++ b/lib/openeo/udf/feature_collection.py @@ -0,0 +1,110 @@ +""" + +""" + +# Note: this module was initially developed under the ``openeo-udf`` project (https://github.com/Open-EO/openeo-udf) +from __future__ import annotations + +from typing import Any, List, Optional, Union + +import pandas +import shapely.geometry + +# Geopandas is optional dependency for now +try: + from geopandas import GeoDataFrame +except ImportError: + class GeoDataFrame: + pass + + +class FeatureCollection: + """ + A feature collection that represents a subset or a whole feature collection + where single vector features may have time stamps assigned. + """ + + def __init__( + self, + id: str, + data: GeoDataFrame, + start_times: Optional[Union[pandas.DatetimeIndex, List[str]]] = None, + end_times: Optional[Union[pandas.DatetimeIndex, List[str]]] = None + ): + """ + Constructor of the of a vector collection + + :param id: The unique id of the vector collection + :param data: A GeoDataFrame with geometry column and attribute data + :param start_times: The vector with start times for each spatial x,y slice + :param end_times: The pandas.DateTimeIndex vector with end times + for each spatial x,y slice, if no + end times are defined, then time instances are assumed not intervals + """ + # TODO #455 `id` is first and a required argument, but it's unclear what it can/should be used for. Can we eliminate it? + self.id = id + self._data = data + # TODO #455 why not include these datetimes directly in the dataframe? + self._start_times = self._as_datetimeindex(start_times, expected_length=len(self.data)) + self._end_times = self._as_datetimeindex(end_times, expected_length=len(self.data)) + + def __repr__(self): + return f"<{type(self).__name__} with {type(self._data).__name__}>" + + @staticmethod + def _as_datetimeindex(dates: Any, expected_length: int = None) -> Union[pandas.DatetimeIndex, None]: + if dates is None: + return dates + if not isinstance(dates, pandas.DatetimeIndex): + dates = pandas.DatetimeIndex(dates) + if expected_length is not None and expected_length != len(dates): + raise ValueError("Expected size {e} but got {a}: {d}".format(e=expected_length, a=len(dates), d=dates)) + return dates + + @property + def data(self) -> GeoDataFrame: + """ + Get the geopandas.GeoDataFrame that contains the geometry column and any number of attribute columns + + :return: A data frame that contains the geometry column and any number of attribute columns + """ + return self._data + + @property + def start_times(self) -> Union[pandas.DatetimeIndex, None]: + return self._start_times + + @property + def end_times(self) -> Union[pandas.DatetimeIndex, None]: + return self._end_times + + def to_dict(self) -> dict: + """ + Convert this FeatureCollection into a dictionary that can be converted into + a valid JSON representation + """ + data = { + "id": self.id, + "data": shapely.geometry.mapping(self.data), + } + if self.start_times is not None: + data["start_times"] = [t.isoformat() for t in self.start_times] + if self.end_times is not None: + data["end_times"] = [t.isoformat() for t in self.end_times] + return data + + @classmethod + def from_dict(cls, data: dict) -> FeatureCollection: + """ + Create a feature collection from a python dictionary that was created from + the JSON definition of the FeatureCollection + + :param data: The dictionary that contains the feature collection definition + :return: A new FeatureCollection object + """ + return cls( + id=data["id"], + data=GeoDataFrame.from_features(data["data"]), + start_times=data.get("start_times"), + end_times=data.get("end_times"), + ) diff --git a/lib/openeo/udf/run_code.py b/lib/openeo/udf/run_code.py new file mode 100644 index 000000000..6c0657dd1 --- /dev/null +++ b/lib/openeo/udf/run_code.py @@ -0,0 +1,328 @@ +""" + +Note: this module was initially developed under the ``openeo-udf`` project (https://github.com/Open-EO/openeo-udf) +""" + +import functools +import inspect +import logging +import math +import pathlib +import re +from typing import Callable, List, Union + +import numpy +import pandas +import shapely +import xarray +from pandas import Series + +import openeo +from openeo import UDF +from openeo.udf import OpenEoUdfException +from openeo.udf._compat import tomllib +from openeo.udf.feature_collection import FeatureCollection +from openeo.udf.structured_data import StructuredData +from openeo.udf.udf_data import UdfData +from openeo.udf.xarraydatacube import XarrayDataCube + +_log = logging.getLogger(__name__) + + +def _build_default_execution_context(): + # TODO: is it really necessary to "pre-load" these modules? Isn't user going to import them explicitly in their script anyway? + context = { + "numpy": numpy, "np": numpy, + "xarray": xarray, + "pandas": pandas, "pd": pandas, + "shapely": shapely, + "math": math, + "UdfData": UdfData, + "XarrayDataCube": XarrayDataCube, + "DataCube": XarrayDataCube, # Legacy alias + "StructuredData": StructuredData, + "FeatureCollection": FeatureCollection, + # "SpatialExtent": SpatialExtent, # TODO? + # "MachineLearnModel": MachineLearnModelConfig, # TODO? + } + + + return context + + +@functools.lru_cache(maxsize=100) +def load_module_from_string(code: str) -> dict: + """ + Experimental: avoid loading same UDF module more than once, to make caching inside the udf work. + @param code: + @return: + """ + globals = _build_default_execution_context() + exec(code, globals) + return globals + + +def _get_annotation_str(annotation: Union[str, type]) -> str: + """Get parameter annotation as a string""" + if isinstance(annotation, str): + return annotation + elif isinstance(annotation, type): + mod = annotation.__module__ + return (mod + "." if mod != str.__module__ else "") + annotation.__name__ + else: + return str(annotation) + + +def _annotation_is_pandas_series(annotation) -> bool: + return annotation in {pandas.Series, _get_annotation_str(pandas.Series)} + + +def _annotation_is_udf_datacube(annotation) -> bool: + return annotation is XarrayDataCube or _get_annotation_str(annotation) in { + _get_annotation_str(XarrayDataCube), + 'openeo_udf.api.datacube.DataCube', # Legacy `openeo_udf` annotation + } + +def _annotation_is_data_array(annotation) -> bool: + return annotation is xarray.DataArray or _get_annotation_str(annotation) in { + _get_annotation_str(xarray.DataArray) + } + +def _annotation_is_udf_data(annotation) -> bool: + return annotation is UdfData or _get_annotation_str(annotation) in { + _get_annotation_str(UdfData), + 'openeo_udf.api.udf_data.UdfData' # Legacy `openeo_udf` annotation + } + + +def _apply_timeseries_xarray(array: xarray.DataArray, callback: Callable[[Series], Series]) -> xarray.DataArray: + """ + Apply timeseries callback to given xarray data array + along its time dimension (named "t" or "time") + + :param array: array to transform + :param callback: function that transforms a timeseries in another (same size) + :return: transformed array + """ + # Make time dimension the last one, and flatten the rest + # to create a 1D sequence of input time series (also 1D). + [time_position] = [i for (i, d) in enumerate(array.dims) if d in ["t", "time"]] + input_series = numpy.moveaxis(array.values, time_position, -1) + orig_shape = input_series.shape + input_series = input_series.reshape((-1, input_series.shape[-1])) + + applied = numpy.asarray([callback(s) for s in input_series]) + + # Reshape to original shape + applied = applied.reshape(orig_shape) + applied = numpy.moveaxis(applied, -1, time_position) + assert applied.shape == array.shape + + return xarray.DataArray(applied, coords=array.coords, dims=array.dims, name=array.name) + + +def apply_timeseries_generic( + udf_data: UdfData, + callback: Callable[[Series, dict], Series] +) -> UdfData: + """ + Implements the UDF contract by calling a user provided time series transformation function. + + :param udf_data: + :param callback: callable that takes a pandas Series and context dict and returns a pandas Series. + See template :py:func:`openeo.udf.udf_signatures.apply_timeseries` + :return: + """ + callback = functools.partial(callback, context=udf_data.user_context) + datacubes = [ + XarrayDataCube(_apply_timeseries_xarray(array=cube.array, callback=callback)) + for cube in udf_data.get_datacube_list() + ] + # Insert the new tiles as list of raster collection tiles in the input object. The new tiles will + # replace the original input tiles. + udf_data.set_datacube_list(datacubes) + return udf_data + + +def run_udf_code(code: str, data: UdfData) -> UdfData: + # TODO: current implementation uses first match directly, first check for multiple matches? + module = load_module_from_string(code) + functions = ((k, v) for (k, v) in module.items() if callable(v)) + + for (fn_name, func) in functions: + try: + sig = inspect.signature(func) + except ValueError: + continue + params = sig.parameters + first_param = next(iter(params.values()), None) + + if ( + fn_name == 'apply_timeseries' + and 'series' in params and 'context' in params + and _annotation_is_pandas_series(params["series"].annotation) + and _annotation_is_pandas_series(sig.return_annotation) + ): + _log.info("Found timeseries mapping UDF `{n}` {f!r}".format(n=fn_name, f=func)) + return apply_timeseries_generic(data, func) + elif ( + fn_name in ['apply_hypercube', 'apply_datacube'] + and 'cube' in params and 'context' in params + and _annotation_is_udf_datacube(params["cube"].annotation) + and _annotation_is_udf_datacube(sig.return_annotation) + ): + _log.info("Found datacube mapping UDF `{n}` {f!r}".format(n=fn_name, f=func)) + if len(data.get_datacube_list()) != 1: + raise ValueError("The provided UDF expects exactly one datacube, but {c} were provided.".format( + c=len(data.get_datacube_list()) + )) + # TODO: also support calls without user context? + result_cube = func(cube=data.get_datacube_list()[0], context=data.user_context) + data.set_datacube_list([result_cube]) + return data + elif ( + fn_name in ['apply_datacube'] + and 'cube' in params and 'context' in params + and _annotation_is_data_array(params["cube"].annotation) + and _annotation_is_data_array(sig.return_annotation) + ): + _log.info("Found datacube mapping UDF `{n}` {f!r}".format(n=fn_name, f=func)) + if len(data.get_datacube_list()) != 1: + raise ValueError("The provided UDF expects exactly one datacube, but {c} were provided.".format( + c=len(data.get_datacube_list()) + )) + # TODO: also support calls without user context? + result_cube: xarray.DataArray = func(cube=data.get_datacube_list()[0].get_array(), context=data.user_context) + data.set_datacube_list([XarrayDataCube(result_cube)]) + return data + elif ( + fn_name in ["apply_vectorcube"] + and "geometries" in params + and _get_annotation_str(params["geometries"].annotation) == "geopandas.geodataframe.GeoDataFrame" + and "cube" in params + and _annotation_is_data_array(params["cube"].annotation) + ): + if data.get_feature_collection_list is None or data.get_datacube_list() is None: + raise ValueError( + "The provided UDF expects a FeatureCollection and a datacube, but received {f} and {c}".format( + f=data.get_feature_collection_list(), c=data.get_datacube_list() + ) + ) + if len(data.get_feature_collection_list()) != 1: + raise ValueError( + "The provided UDF expects exactly one FeatureCollection, but {c} were provided.".format( + c=len(data.get_feature_collection_list()) + ) + ) + if len(data.get_datacube_list()) != 1: + raise ValueError( + "The provided UDF expects exactly one datacube, but {c} were provided.".format( + c=len(data.get_datacube_list()) + ) + ) + # TODO: geopandas is optional dependency. + input_geoms = data.get_feature_collection_list()[0].data + input_cube = data.get_datacube_list()[0].get_array() + result_geoms, result_cube = func(geometries=input_geoms, cube=input_cube, context=data.user_context) + data.set_datacube_list([XarrayDataCube(result_cube)]) + data.set_feature_collection_list([FeatureCollection(id="udf_result", data=result_geoms)]) + return data + elif len(params) == 1 and _annotation_is_udf_data(first_param.annotation): + _log.info("Found generic UDF `{n}` {f!r}".format(n=fn_name, f=func)) + func(data) + return data + + raise OpenEoUdfException("No UDF found.") + + +def execute_local_udf(udf: Union[str, openeo.UDF], datacube: Union[str, xarray.DataArray, XarrayDataCube], fmt='netcdf'): + """ + Locally executes an user defined function on a previously downloaded datacube. + + :param udf: the code of the user defined function + :param datacube: the path to the downloaded data in disk or a DataCube + :param fmt: format of the file if datacube is string + :return: the resulting DataCube + """ + if isinstance(udf, openeo.UDF): + udf = udf.code + + if isinstance(datacube, (str, pathlib.Path)): + d = XarrayDataCube.from_file(path=datacube, fmt=fmt) + elif isinstance(datacube, XarrayDataCube): + d = datacube + elif isinstance(datacube, xarray.DataArray): + d = XarrayDataCube(datacube) + else: + raise ValueError(datacube) + d_array = d.get_array() + expected_order = ("t", "bands", "y", "x") + dims = [d for d in expected_order if d in d_array.dims] + + # TODO #472: skip going through XarrayDataCube above, we only need xarray.DataArray here anyway. + d = XarrayDataCube( + d_array.transpose(*dims) + # TODO: this float conversion was in original implementation (0962e00e03) but is that actually necessary? + .astype(numpy.float64) + ) + # wrap to udf_data + udf_data = UdfData(datacube_list=[d]) + + # TODO: enrich to other types like time series, vector data,... probalby by adding named arguments + # signature: UdfData(proj, datacube_list, feature_collection_list, structured_data_list, ml_model_list, metadata) + + # run the udf through the same routine as it would have been parsed in the backend + result = run_udf_code(udf, udf_data) + return result + + +def extract_udf_dependencies(udf: Union[str, UDF]) -> Union[List[str], None]: + """ + Extract dependencies from UDF code declared in a top-level comment block + following the `inline script metadata specification (PEP 508) `_. + + Basic example UDF snippet declaring expected dependencies as embedded metadata + in a comment block: + + .. code-block:: python + + # /// script + # dependencies = [ + # "geojson", + # ] + # /// + + import geojson + + def apply_datacube(cube: xarray.DataArray, context: dict) -> xarray.DataArray: + ... + + .. seealso:: :ref:`python-udf-dependency-declaration` for more in-depth information. + + :param udf: UDF code as a string or :py:class:`~openeo.rest._datacube.UDF` object + :return: List of extracted dependencies or ``None`` when no valid metadata block with dependencies was found. + + .. versionadded:: 0.30.0 + """ + udf_code = udf.code if isinstance(udf, UDF) else udf + + # Extract "script" blocks + script_type = "script" + block_regex = re.compile( + r"^# /// (?P[a-zA-Z0-9-]+)\s*$\s(?P(^#(| .*)$\s)+)^# ///$", flags=re.MULTILINE + ) + script_blocks = [ + match.group("content") for match in block_regex.finditer(udf_code) if match.group("type") == script_type + ] + + if len(script_blocks) > 1: + raise ValueError(f"Multiple {script_type!r} blocks found in top-level comment") + elif len(script_blocks) == 0: + return None + + # Extract dependencies from "script" block + content = "".join( + line[2:] if line.startswith("# ") else line[1:] for line in script_blocks[0].splitlines(keepends=True) + ) + + return tomllib.loads(content).get("dependencies") diff --git a/lib/openeo/udf/structured_data.py b/lib/openeo/udf/structured_data.py new file mode 100644 index 000000000..038bb37be --- /dev/null +++ b/lib/openeo/udf/structured_data.py @@ -0,0 +1,47 @@ +""" + +""" + +# Note: this module was initially developed under the ``openeo-udf`` project (https://github.com/Open-EO/openeo-udf) + +from __future__ import annotations + +import builtins +from typing import Union + + +class StructuredData: + """ + This class represents structured data that is produced by an UDF and can not be represented + as a raster or vector data cube. For example: the result of a statistical + computation. + + Usage example:: + + >>> StructuredData([3, 5, 8, 13]) + >>> StructuredData({"mean": 5, "median": 8}) + >>> StructuredData([('col_1', 'col_2'), (1, 2), (2, 3)], type="table") + """ + + def __init__(self, data: Union[list, dict], description: str = None, type: str = None): + self.data = data + self.type = type or builtins.type(data).__name__ + self.description = description or self.type + + def __repr__(self): + return f"<{type(self).__name__} with {self.type}>" + + def to_dict(self) -> dict: + return dict( + data=self.data, + description=self.description, + type=self.type, + ) + + @classmethod + def from_dict(cls, data: dict) -> StructuredData: + return cls( + data=data["data"], + description=data.get("description"), + type=data.get("type") + ) diff --git a/lib/openeo/udf/udf_data.py b/lib/openeo/udf/udf_data.py new file mode 100644 index 000000000..e07ccdf8b --- /dev/null +++ b/lib/openeo/udf/udf_data.py @@ -0,0 +1,135 @@ +""" + +""" + +# Note: this module was initially developed under the ``openeo-udf`` project (https://github.com/Open-EO/openeo-udf) + +from __future__ import annotations + +from typing import List, Optional, Union + +from openeo.udf.feature_collection import FeatureCollection +from openeo.udf.structured_data import StructuredData +from openeo.udf.xarraydatacube import XarrayDataCube + + +class UdfData: + """ + Container for data passed to a user defined function (UDF) + """ + + # TODO: original implementation in `openeo_udf` project had `get_datacube_by_id`, `get_feature_collection_by_id`: is it still useful to provide this? + # TODO: original implementation in `openeo_udf` project had `server_context`: is it still useful to provide this? + + def __init__( + self, + proj: dict = None, + datacube_list: Optional[List[XarrayDataCube]] = None, + feature_collection_list: Optional[List[FeatureCollection]] = None, + structured_data_list: Optional[List[StructuredData]] = None, + user_context: Optional[dict] = None, + ): + """ + The constructor of the UDF argument class that stores all data required by the + user defined function. + + :param proj: A dictionary of form {"proj type string": "projection description"} e.g. {"EPSG": 4326} + :param datacube_list: A list of data cube objects + :param feature_collection_list: A list of VectorTile objects + :param structured_data_list: A list of structured data objects + """ + self.datacube_list = datacube_list + self.feature_collection_list = feature_collection_list + self.structured_data_list = structured_data_list + self.proj = proj + self._user_context = user_context or {} + + def __repr__(self) -> str: + fields = " ".join( + f"{f}:{getattr(self, f)!r}" for f in + ["datacube_list", "feature_collection_list", "structured_data_list"] + ) + return f"<{type(self).__name__} {fields}>" + + @property + def user_context(self) -> dict: + """Return the user context that was passed to the run_udf function""" + return self._user_context + + def get_datacube_list(self) -> Union[List[XarrayDataCube], None]: + """Get the data cube list""" + return self._datacube_list + + def set_datacube_list(self, datacube_list: Union[List[XarrayDataCube], None]): + """ + Set the data cube list + + :param datacube_list: A list of data cubes + """ + self._datacube_list = datacube_list + + datacube_list = property(fget=get_datacube_list, fset=set_datacube_list) + + def get_feature_collection_list(self) -> Union[List[FeatureCollection], None]: + """get all feature collections as list""" + return self._feature_collection_list + + def set_feature_collection_list(self, feature_collection_list: Union[List[FeatureCollection], None]): + self._feature_collection_list = feature_collection_list + + feature_collection_list = property(fget=get_feature_collection_list, fset=set_feature_collection_list) + + def get_structured_data_list(self) -> Union[List[StructuredData], None]: + """ + Get all structured data entries + + :return: A list of StructuredData objects + """ + return self._structured_data_list + + def set_structured_data_list(self, structured_data_list: Union[List[StructuredData], None]): + """ + Set the list of structured data + + :param structured_data_list: A list of StructuredData objects + """ + self._structured_data_list = structured_data_list + + structured_data_list = property(fget=get_structured_data_list, fset=set_structured_data_list) + + def to_dict(self) -> dict: + """ + Convert this UdfData object into a dictionary that can be converted into + a valid JSON representation + """ + return { + "datacubes": [x.to_dict() for x in self.datacube_list] \ + if self.datacube_list else None, + "feature_collection_list": [x.to_dict() for x in self.feature_collection_list] \ + if self.feature_collection_list else None, + "structured_data_list": [x.to_dict() for x in self.structured_data_list] \ + if self.structured_data_list else None, + "proj": self.proj, + "user_context": self.user_context, + } + + @classmethod + def from_dict(cls, udf_dict: dict) -> UdfData: + """ + Create a udf data object from a python dictionary that was created from + the JSON definition of the UdfData class + + :param udf_dict: The dictionary that contains the udf data definition + """ + + datacubes = [XarrayDataCube.from_dict(x) for x in udf_dict.get("datacubes", [])] + feature_collection_list = [FeatureCollection.from_dict(x) for x in udf_dict.get("feature_collection_list", [])] + structured_data_list = [StructuredData.from_dict(x) for x in udf_dict.get("structured_data_list", [])] + udf_data = cls( + proj=udf_dict.get("proj"), + datacube_list=datacubes, + feature_collection_list=feature_collection_list, + structured_data_list=structured_data_list, + user_context=udf_dict.get("user_context") + ) + return udf_data diff --git a/lib/openeo/udf/udf_signatures.py b/lib/openeo/udf/udf_signatures.py new file mode 100644 index 000000000..7afe36a6a --- /dev/null +++ b/lib/openeo/udf/udf_signatures.py @@ -0,0 +1,109 @@ +""" +This module defines a number of function signatures that can be implemented by UDF's. +Both the name of the function and the argument types are/can be used by the backend to validate if the provided UDF +is compatible with the calling context of the process graph in which it is used. + +""" +# Note: this module was initially developed under the ``openeo-udf`` project (https://github.com/Open-EO/openeo-udf) + +import xarray +from pandas import Series + +from openeo.metadata import CollectionMetadata +from openeo.udf.udf_data import UdfData +from openeo.udf.xarraydatacube import XarrayDataCube + +try: + # Geopandas is an optional dependency, but one of the signatures uses it as type annotation + import geopandas +except ImportError: + pass + + +def apply_timeseries(series: Series, context: dict) -> Series: + """ + Process a timeseries of values, without changing the time instants. + + This can for instance be used for smoothing or gap-filling. + + :param series: A Pandas Series object with a date-time index. + :param context: A dictionary containing user context. + :return: A Pandas Series object with the same datetime index. + """ + # TODO: do we need geospatial coordinates for the series? + return series + + +def apply_datacube(cube: XarrayDataCube, context: dict) -> XarrayDataCube: + """ + Map a :py:class:`XarrayDataCube` to another :py:class:`XarrayDataCube`. + + Depending on the context in which this function is used, the :py:class:`XarrayDataCube` dimensions + have to be retained or can be chained. + For instance, in the context of a reducing operation along a dimension, + that dimension will have to be reduced to a single value. + In the context of a 1 to 1 mapping operation, all dimensions have to be retained. + + :param cube: input data cube + :param context: A dictionary containing user context. + :return: output data cube + """ + return cube + + +def apply_udf_data(data: UdfData): + """ + Generic UDF function that directly manipulates a :py:class:`UdfData` object + + :param data: :py:class:`UdfData` object to manipulate in-place + """ + pass + + +def apply_metadata(metadata: CollectionMetadata, context: dict) -> CollectionMetadata: + """ + .. warning:: + This signature is not yet fully standardized and subject to change. + + Returns the expected cube metadata, after applying this UDF, based on input metadata. + The provided metadata represents the whole raster or vector cube. This function does not need to be called for every data chunk. + + When this function is not implemented by the UDF, the backend may still be able to infer correct metadata by running the + UDF, but this can result in reduced performance or errors. + + This function does not need to be provided when using the UDF in combination with processes that by design have a clear + effect on cube metadata, such as :py:meth:`~openeo.rest.datacube.DataCube.reduce_dimension()` + + :param metadata: the collection metadata of the input data cube + :param context: A dictionary containing user context. + + :return: output metadata: the expected metadata of the cube, after applying the udf + + Examples + -------- + + An example for a UDF that is applied on the 'bands' dimension, and returns a new set of bands with different labels. + + >>> def apply_metadata(metadata: CollectionMetadata, context: dict) -> CollectionMetadata: + ... return metadata.rename_labels( + ... dimension="bands", + ... target=["computed_band_1", "computed_band_2"] + ... ) + + """ + pass + + +def apply_vectorcube( + geometries: "geopandas.geodataframe.GeoDataFrame", cube: xarray.DataArray, context: dict +) -> ("geopandas.geodataframe.GeoDataFrame", xarray.DataArray): + """ + Map a vector cube to another vector cube. + + :param geometries: input geometries as a geopandas.GeoDataFrame. This contains the actual shapely geometries and optional properties. + :param cube: a data cube with dimensions (geometries, time, bands) where time and bands are optional. + The coordinates for the geometry dimension are integers and match the index of the geometries in the geometries parameter. + :param context: A dictionary containing user context. + :return: output geometries, output data cube + """ + pass diff --git a/lib/openeo/udf/xarraydatacube.py b/lib/openeo/udf/xarraydatacube.py new file mode 100644 index 000000000..05dd5311d --- /dev/null +++ b/lib/openeo/udf/xarraydatacube.py @@ -0,0 +1,381 @@ +""" + +""" + +# Note: this module was initially developed under the ``openeo-udf`` project (https://github.com/Open-EO/openeo-udf) + +from __future__ import annotations + +import collections +import json +import typing +from pathlib import Path +from typing import Optional, Union + +import numpy +import xarray + +from openeo.udf import OpenEoUdfException +from openeo.util import deep_get, dict_no_none + +if typing.TYPE_CHECKING: + # Imports for type checking only (circular import issue at runtime). + import matplotlib.colors + + +class XarrayDataCube: + """ + This is a thin wrapper around :py:class:`xarray.DataArray` + providing a basic "DataCube" interface for openEO UDF usage around multi-dimensional data. + """ + + # TODO #472 This class, just wrapping an array.DataArray, seems to make things more complicated/confusing than necessary. + + def __init__(self, array: xarray.DataArray): + if not isinstance(array, xarray.DataArray): + raise OpenEoUdfException("Argument data must be of type xarray.DataArray") + self._array = array + + def __repr__(self): + return f"<{type(self).__name__} shape:{self._array.shape}>" + + def get_array(self) -> xarray.DataArray: + """ + Get the :py:class:`xarray.DataArray` that contains the data and dimension definition + """ + return self._array + + array = property(fget=get_array) + + @property + def id(self): + return self._array.name + + def to_dict(self) -> dict: + """ + Convert this hypercube into a dictionary that can be converted into + a valid JSON representation + + >>> example = { + ... "id": "test_data", + ... "data": [ + ... [[0.0, 0.1], [0.2, 0.3]], + ... [[0.0, 0.1], [0.2, 0.3]], + ... ], + ... "dimension": [ + ... {"name": "time", "coordinates": ["2001-01-01", "2001-01-02"]}, + ... {"name": "X", "coordinates": [50.0, 60.0]}, + ... {"name": "Y"}, + ... ], + ... } + """ + xd = self._array.to_dict() + return dict_no_none({ + "id": xd.get("name"), + "data": xd.get("data"), + "description": deep_get(xd, "attrs", "description", default=None), + "dimensions": [ + dict_no_none( + name=dim, + coordinates=deep_get(xd, "coords", dim, "data", default=None) + ) + for dim in xd.get("dims", []) + ] + }) + + @classmethod + def from_dict(cls, xdc_dict: dict) -> XarrayDataCube: + """ + Create a :py:class:`XarrayDataCube` from a Python dictionary that was created from + the JSON definition of the data cube + + :param data: The dictionary that contains the data cube definition + """ + + if "data" not in xdc_dict: + raise OpenEoUdfException("Missing data in dictionary") + + data = numpy.asarray(xdc_dict["data"]) + + if "dimensions" in xdc_dict: + dims = [dim["name"] for dim in xdc_dict["dimensions"]] + coords = {dim["name"]: dim["coordinates"] for dim in xdc_dict["dimensions"] if "coordinates" in dim} + else: + dims = None + coords = None + + x = xarray.DataArray(data, dims=dims, coords=coords, name=xdc_dict.get("id")) + + if "description" in xdc_dict: + x.attrs["description"] = xdc_dict["description"] + + return cls(array=x) + + @staticmethod + def _guess_format(path: Union[str, Path]) -> str: + """Guess file format from file name.""" + suffix = Path(path).suffix.lower() + if suffix in [".nc", ".netcdf"]: + return "netcdf" + elif suffix in [".json"]: + return "json" + else: + raise ValueError("Can not guess format of {p}".format(p=path)) + + @classmethod + def from_file(cls, path: Union[str, Path], fmt=None, **kwargs) -> XarrayDataCube: + """ + Load data file as :py:class:`XarrayDataCube` in memory + + :param path: the file on disk + :param fmt: format to load from, e.g. "netcdf" or "json" + (will be auto-detected when not specified) + + :return: loaded data cube + """ + fmt = fmt or cls._guess_format(path) + if fmt.lower() == 'netcdf': + return cls(array=XarrayIO.from_netcdf_file(path=path, **kwargs)) + elif fmt.lower() == 'json': + return cls(array=XarrayIO.from_json_file(path=path)) + else: + raise ValueError("invalid format {f}".format(f=fmt)) + + def save_to_file(self, path: Union[str, Path], fmt=None, **kwargs): + """ + Store :py:class:`XarrayDataCube` to file + + :param path: destination file on disk + :param fmt: format to save as, e.g. "netcdf" or "json" + (will be auto-detected when not specified) + """ + fmt = fmt or self._guess_format(path) + if fmt.lower() == 'netcdf': + XarrayIO.to_netcdf_file(array=self.get_array(), path=path, **kwargs) + elif fmt.lower() == 'json': + XarrayIO.to_json_file(array=self.get_array(), path=path) + else: + raise ValueError(fmt) + + def plot( + self, + title: str = None, + limits=None, + show_bandnames: bool = True, + show_dates: bool = True, + show_axeslabels: bool = False, + fontsize: float = 10., + oversample: float = 1, + cmap: Union[str, 'matplotlib.colors.Colormap'] = 'RdYlBu_r', + cbartext: str = None, + to_file: str = None, + to_show: bool = True + ): + """ + Visualize a :py:class:`XarrayDataCube` with matplotlib + + :param datacube: data to plot + :param title: title text drawn in the top left corner (default: nothing) + :param limits: range of the contour plot as a tuple(min,max) (default: None, in which case the min/max is computed from the data) + :param show_bandnames: whether to plot the column names (default: True) + :param show_dates: whether to show the dates for each row (default: True) + :param show_axeslabels: whether to show the labels on the axes (default: False) + :param fontsize: font size in pixels (default: 10) + :param oversample: one value is plotted into oversample x oversample number of pixels (default: 1 which means each value is plotted as a single pixel) + :param cmap: built-in matplotlib color map name or ColorMap object (default: RdYlBu_r which is a blue-yellow-red rainbow) + :param cbartext: text on top of the legend (default: nothing) + :param to_file: filename to save the image to (default: None, which means no file is generated) + :param to_show: whether to show the image in a matplotlib window (default: True) + + :return: None + """ + from matplotlib import pyplot + + data = self.get_array() + if limits is None: + vmin = data.min() + vmax = data.max() + else: + vmin = limits[0] + vmax = limits[1] + + # fill bands and t if missing + if 'bands' not in data.dims: + data = data.expand_dims(dim={'bands': ['band0']}) + if 't' not in data.dims: + data = data.expand_dims(dim={'t': [numpy.datetime64('today')]}) + if 'bands' not in data.coords: + data['bands'] = ['band0'] + if 't' not in data.coords: + data['t'] = [numpy.datetime64('today')] + + # align with plot + data = data.transpose('t', 'bands', 'y', 'x') + dpi = 100 + xres = len(data.x) / dpi + yres = len(data.y) / dpi + fs = fontsize / oversample + frame = 0.33 + + nrow = data.shape[0] + ncol = data.shape[1] + + fig = pyplot.figure(figsize=((ncol + frame) * xres * 1.1, (nrow + frame) * yres), dpi=int(dpi * oversample)) + gs = pyplot.GridSpec(nrow, ncol, wspace=0., hspace=0., top=nrow / (nrow + frame), bottom=0., + left=frame / (ncol + frame), right=1.) + + xmin = data.x.min() + xmax = data.x.max() + ymin = data.y.min() + ymax = data.y.max() + + # flip around if incorrect, this is in harmony with origin='lower' + if (data.x[0] > data.x[-1]): + data = data.reindex(x=list(reversed(data.x))) + if (data.y[0] > data.y[-1]): + data = data.reindex(y=list(reversed(data.y))) + + extent = (data.x[0], data.x[-1], data.y[0], data.y[-1]) + + for i in range(nrow): + for j in range(ncol): + im = data[i, j] + ax = pyplot.subplot(gs[i, j]) + ax.set_xlim(xmin, xmax) + ax.set_ylim(ymin, ymax) + img = ax.imshow(im, vmin=vmin, vmax=vmax, cmap=cmap, origin='lower', extent=extent) + ax.xaxis.set_tick_params(labelsize=fs) + ax.yaxis.set_tick_params(labelsize=fs) + if not show_axeslabels: + ax.set_axis_off() + ax.set_xticklabels([]) + ax.set_yticklabels([]) + if show_bandnames: + if i == 0: ax.text(0.5, 1.08, data.bands.values[j] + " (" + str(data.dtype) + ")", size=fs, + va="center", + ha="center", transform=ax.transAxes) + if show_dates: + if j == 0: ax.text(-0.08, 0.5, data.t.dt.strftime("%Y-%m-%d").values[i], size=fs, va="center", + ha="center", rotation=90, transform=ax.transAxes) + + if title is not None: + fig.text(0., 1., title.split('/')[-1], size=fs, va="top", ha="left", weight='bold') + + cbar_ax = fig.add_axes([0.01, 0.1, 0.04, 0.5]) + if cbartext is not None: + fig.text(0.06, 0.62, cbartext, size=fs, va="bottom", ha="center") + cbar = fig.colorbar(img, cax=cbar_ax) + cbar.ax.tick_params(labelsize=fs) + cbar.outline.set_visible(False) + cbar.ax.tick_params(size=0) + cbar.ax.yaxis.set_tick_params(pad=0) + + if to_file is not None: + pyplot.savefig(str(to_file)) + if to_show: + pyplot.show() + + pyplot.close() + + +class XarrayIO: + """ + Helpers to load/store :py:cass:`xarray.DataArray` objects, + with some conventions about expected dimensions/bands + """ + + @classmethod + def from_json_file(cls, path: Union[str, Path]) -> xarray.DataArray: + with Path(path).open() as f: + return cls.from_json(json.load(f)) + + @classmethod + def from_json(cls, d: dict) -> xarray.DataArray: + d['data'] = numpy.array(d['data'], dtype=numpy.dtype(d['attrs']['dtype'])) + for k, v in d['coords'].items(): + # prepare coordinate + d['coords'][k]['data'] = numpy.array(v['data'], dtype=v['attrs']['dtype']) + # remove dtype and shape, because that is included for helping the user + if d['coords'][k].get('attrs', None) is not None: + d['coords'][k]['attrs'].pop('dtype', None) + d['coords'][k]['attrs'].pop('shape', None) + + # remove dtype and shape, because that is included for helping the user + if d.get('attrs', None) is not None: + d['attrs'].pop('dtype', None) + d['attrs'].pop('shape', None) + # convert to xarray + r = xarray.DataArray.from_dict(d) + + # build dimension list in proper order + dims = list(filter(lambda i: i != 't' and i != 'bands' and i != 'x' and i != 'y', r.dims)) + if 't' in r.dims: dims += ['t'] + if 'bands' in r.dims: dims += ['bands'] + if 'x' in r.dims: dims += ['x'] + if 'y' in r.dims: dims += ['y'] + # return the resulting data array + return r.transpose(*dims) + + @classmethod + def from_netcdf_file(cls, path: Union[str, Path], engine: Optional[str] = None) -> xarray.DataArray: + # load the dataset and convert to data array + ds = xarray.open_dataset(path, engine=engine) + + # Skip non-numerical variables (like "crs") + band_vars = [k for k, v in ds.data_vars.items() if v.dtype.kind in {"b", "i", "u", "f"} and len(v.dims) > 0] + ds = ds[band_vars] + + r = ds.to_array(dim='bands') + + # Reorder dims to proper order (t-bands-x-y at the end) + expected_order = ("t", "bands", "x", "y") + dims = [d for d in r.dims if d not in expected_order] + [d for d in expected_order if d in r.dims] + + return r.transpose(*dims) + + @classmethod + def to_json_file(cls, array: xarray.DataArray, path: Union[str, Path]): + # to deserialized json + jsonarray = array.to_dict() + # add attributes that needed for re-creating xarray from json + jsonarray['attrs']['dtype'] = str(array.values.dtype) + jsonarray['attrs']['shape'] = list(array.values.shape) + for i in array.coords.values(): + jsonarray['coords'][i.name]['attrs']['dtype'] = str(i.dtype) + jsonarray['coords'][i.name]['attrs']['shape'] = list(i.shape) + # custom print so resulting json file is humanly easy to read + # TODO: make this human friendly JSON format optional and allow compact JSON too. + with Path(path).open("w", encoding="utf-8") as f: + def custom_print(data_structure, indent=1): + f.write("{\n") + needs_comma = False + for key, value in data_structure.items(): + if needs_comma: + f.write(',\n') + needs_comma = True + f.write(' ' * indent + json.dumps(key) + ':') + if isinstance(value, dict): + custom_print(value, indent + 1) + else: + json.dump(value, f, default=str, separators=(',', ':')) + f.write('\n' + ' ' * (indent - 1) + "}") + + custom_print(jsonarray) + + @classmethod + def to_netcdf_file(cls, array: xarray.DataArray, path: Union[str, Path], engine: Optional[str] = None): + # temp reference to avoid modifying the original array + result = array + # rearrange in a basic way because older xarray versions have a bug and ellipsis don't work in xarray.transpose() + if result.dims[-2] == 'x' and result.dims[-1] == 'y': + l = list(result.dims[:-2]) + result = result.transpose(*(l + ['y', 'x'])) + # turn it into a dataset where each band becomes a variable + if not 'bands' in result.dims: + result = result.expand_dims(dim=collections.OrderedDict({'bands': ['band_0']})) + else: + if not 'bands' in result.coords: + labels = ['band_' + str(i) for i in range(result.shape[result.dims.index('bands')])] + result = result.assign_coords(bands=labels) + result = result.to_dataset('bands') + result.to_netcdf(path, engine=engine) diff --git a/lib/openeo/util.py b/lib/openeo/util.py new file mode 100644 index 000000000..44842124a --- /dev/null +++ b/lib/openeo/util.py @@ -0,0 +1,689 @@ +""" +Various utilities and helpers. +""" + +# TODO #465 split this kitchen-sink in thematic submodules + +from __future__ import annotations + +import datetime as dt +import functools +import json +import logging +import re +import sys +import time +from collections import OrderedDict +from enum import Enum +from pathlib import Path +from typing import Any, Callable, List, Optional, Tuple, Union +from urllib.parse import urljoin + +import requests +import shapely.geometry.base +from deprecated import deprecated + +try: + # pyproj is an optional dependency + import pyproj +except ImportError: + pyproj = None + + +logger = logging.getLogger(__name__) + + +class Rfc3339: + """ + Formatter for dates according to RFC-3339. + + Parses date(time)-like input and formats according to RFC-3339. Some examples: + + >>> rfc3339.date("2020:03:17") + "2020-03-17" + >>> rfc3339.date(2020, 3, 17) + "2020-03-17" + >>> rfc3339.datetime("2020/03/17/12/34/56") + "2020-03-17T12:34:56Z" + >>> rfc3339.datetime([2020, 3, 17, 12, 34, 56]) + "2020-03-17T12:34:56Z" + >>> rfc3339.datetime(2020, 3, 17) + "2020-03-17T00:00:00Z" + >>> rfc3339.datetime(datetime(2020, 3, 17, 12, 34, 56)) + "2020-03-17T12:34:56Z" + + Or just normalize (automatically preserve date/datetime resolution): + + >>> rfc3339.normalize("2020/03/17") + "2020-03-17" + >>> rfc3339.normalize("2020-03-17-12-34-56") + "2020-03-17T12:34:56Z" + + Also see https://tools.ietf.org/html/rfc3339#section-5.6 + """ + # TODO: currently we hard code timezone 'Z' for simplicity. Add real time zone support? + _FMT_DATE = '%Y-%m-%d' + _FMT_TIME = '%H:%M:%SZ' + _FMT_DATETIME = _FMT_DATE + "T" + _FMT_TIME + + _regex_datetime = re.compile(r""" + ^(?P\d{4})[:/_-](?P\d{2})[:/_-](?P\d{2})[T :/_-]? + (?:(?P\d{2})[:/_-](?P\d{2})(?:[:/_-](?P\d{2}))?)?""", re.VERBOSE) + + def __init__(self, propagate_none: bool = False): + self._propagate_none = propagate_none + + def datetime(self, x: Any, *args) -> Union[str, None]: + """ + Format given date(time)-like object as RFC-3339 datetime string. + """ + if args: + return self.datetime((x,) + args) + elif isinstance(x, dt.datetime): + return self._format_datetime(x) + elif isinstance(x, dt.date): + return self._format_datetime(dt.datetime.combine(x, dt.time())) + elif isinstance(x, str): + return self._format_datetime(dt.datetime(*self._parse_datetime(x))) + elif isinstance(x, (tuple, list)): + return self._format_datetime(dt.datetime(*(int(v) for v in x))) + elif x is None and self._propagate_none: + return None + raise ValueError(x) + + def date(self, x: Any, *args) -> Union[str, None]: + """ + Format given date-like object as RFC-3339 date string. + """ + if args: + return self.date((x,) + args) + elif isinstance(x, (dt.date, dt.datetime)): + return self._format_date(x) + elif isinstance(x, str): + return self._format_date(dt.datetime(*self._parse_datetime(x))) + elif isinstance(x, (tuple, list)): + return self._format_date(dt.datetime(*(int(v) for v in x))) + elif x is None and self._propagate_none: + return None + raise ValueError(x) + + def normalize(self, x: Any, *args) -> Union[str, None]: + """ + Format given date(time)-like object as RFC-3339 date or date-time string depending on given resolution + + >>> rfc3339.normalize("2020/03/17") + "2020-03-17" + >>> rfc3339.normalize("2020/03/17/12/34/56") + "2020-03-17T12:34:56Z" + """ + if args: + return self.normalize((x,) + args) + elif isinstance(x, dt.datetime): + return self.datetime(x) + elif isinstance(x, dt.date): + return self.date(x) + elif isinstance(x, str): + x = self._parse_datetime(x) + return self.date(x) if len(x) <= 3 else self.datetime(x) + elif isinstance(x, (tuple, list)): + return self.date(x) if len(x) <= 3 else self.datetime(x) + elif x is None and self._propagate_none: + return None + raise ValueError(x) + + def parse_date(self, x: Union[str, None]) -> Union[dt.date, None]: + """Parse given string as RFC3339 date.""" + if isinstance(x, str): + return dt.datetime.strptime(x, "%Y-%m-%d").date() + elif x is None and self._propagate_none: + return None + raise ValueError(x) + + def parse_datetime( + self, x: Union[str, None], with_timezone: bool = False + ) -> Union[dt.datetime, None]: + """Parse given string as RFC3339 date-time.""" + if isinstance(x, str): + # TODO: Also support parsing other timezones than UTC (Z) + if re.search(r":\d+\.\d+", x): + res = dt.datetime.strptime(x, "%Y-%m-%dT%H:%M:%S.%fZ") + else: + res = dt.datetime.strptime(x, "%Y-%m-%dT%H:%M:%SZ") + if with_timezone: + res = res.replace(tzinfo=dt.timezone.utc) + return res + elif x is None and self._propagate_none: + return None + raise ValueError(x) + + def parse_date_or_datetime( + self, x: Union[str, None], with_timezone: bool = False + ) -> Union[dt.date, dt.datetime, None]: + """Parse given string as RFC3339 date or date-time.""" + if isinstance(x, str): + if len(x) > 10: + return self.parse_datetime(x, with_timezone=with_timezone) + else: + return self.parse_date(x) + elif x is None and self._propagate_none: + return None + raise ValueError(x) + + @classmethod + def _format_datetime(cls, d: dt.datetime) -> str: + """Format given datetime as RFC-3339 date-time string.""" + if not (d.tzinfo is None or d.tzinfo.tzname(d) == "UTC"): + # TODO: add support for non-UTC timezones? + raise ValueError(f"No support for non-UTC timezone {d.tzinfo}") + return d.strftime(cls._FMT_DATETIME) + + @classmethod + def _format_date(cls, d: dt.date) -> str: + """Format given datetime as RFC-3339 date-time string.""" + return d.strftime(cls._FMT_DATE) + + @classmethod + def _parse_datetime(cls, s: str) -> Tuple[int]: + """Try to parse string to a date(time) tuple""" + try: + return tuple(int(v) for v in cls._regex_datetime.match(s).groups() if v is not None) + except Exception: + raise ValueError("Can not parse as date: {s}".format(s=s)) + + def today(self) -> str: + """Today (date) in RFC3339 format""" + return self.date(dt.date.today()) + + def utcnow(self) -> str: + """Current UTC datetime in RFC3339 format.""" + # Current time in UTC timezone (instead of naive `datetime.datetime.utcnow()`, per `datetime` documentation) + now = dt.datetime.now(tz=dt.timezone.utc) + return self.datetime(now) + + +# Default RFC3339 date-time formatter +rfc3339 = Rfc3339() + + +@deprecated("Use `rfc3339.normalize`, `rfc3339.date` or `rfc3339.datetime` instead") +def date_to_rfc3339(d: Any) -> str: + """ + Convert date-like object to a RFC 3339 formatted date string + + see https://tools.ietf.org/html/rfc3339#section-5.6 + """ + return rfc3339.normalize(d) + + +def dict_no_none(*args, **kwargs) -> dict: + """ + Helper to build a dict containing given key-value pairs where the value is not None. + """ + return { + k: v + for k, v in dict(*args, **kwargs).items() + if v is not None + } + + +def first_not_none(*args): + """Return first item from given arguments that is not None.""" + for item in args: + if item is not None: + return item + raise ValueError("No not-None values given.") + + +def ensure_dir(path: Union[str, Path]) -> Path: + """Create directory if it doesn't exist.""" + path = Path(path) + if not path.exists(): + path.mkdir(parents=True, exist_ok=True) + assert path.is_dir() + return path + + +def ensure_list(x): + """Convert given data structure to a list.""" + try: + return list(x) + except TypeError: + return [x] + + +class ContextTimer: + """ + Context manager to measure the "wall clock" time (in seconds) inside/for a block of code. + + Usage example: + + with ContextTimer() as timer: + # Inside code block: currently elapsed time + print(timer.elapsed()) + + # Outside code block: elapsed time when block ended + print(timer.elapsed()) + + """ + + __slots__ = ["start", "end"] + + # Function that returns current time in seconds (overridable for unit tests) + _clock = time.time + + def __init__(self): + self.start = None + self.end = None + + def elapsed(self) -> float: + """Elapsed time (in seconds) inside or at the end of wrapped context.""" + if self.start is None: + raise RuntimeError("Timer not started.") + if self.end is not None: + # Elapsed time when exiting context. + return self.end - self.start + else: + # Currently elapsed inside context. + return self._clock() - self.start + + def __enter__(self) -> ContextTimer: + self.start = self._clock() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.end = self._clock() + + +class TimingLogger: + """ + Context manager for quick and easy logging of start time, end time and elapsed time of some block of code + + Usage example: + + >>> with TimingLogger("Doing batch job"): + ... do_batch_job() + + At start of the code block the current time will be logged + and at end of the code block the end time and elapsed time will be logged. + + Can also be used as a function/method decorator, for example: + + >>> @TimingLogger("Calculation going on") + ... def add(x, y): + ... return x + y + """ + + # Function that returns current datetime (overridable for unit tests) + _now = dt.datetime.now + + def __init__(self, title: str = "Timing", logger: Union[logging.Logger, str, Callable] = logger): + """ + :param title: the title to use in the logging + :param logger: how the timing should be logged. + Can be specified as a logging.Logger object (in which case the INFO log level will be used), + as a string (name of the logging.Logger object to construct), + or as callable (e.g. to use the `print` function, or the `.debug` method of an existing logger) + """ + self.title = title + if isinstance(logger, str): + logger = logging.getLogger(logger) + if isinstance(logger, (logging.Logger, logging.LoggerAdapter)): + self._log = logger.info + elif callable(logger): + self._log = logger + else: + raise ValueError("Invalid logger {l!r}".format(l=logger)) + + self.start_time = self.end_time = self.elapsed = None + + def __enter__(self): + self.start_time = self._now() + self._log("{t}: start {s}".format(t=self.title, s=self.start_time)) + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.end_time = self._now() + self.elapsed = self.end_time - self.start_time + self._log("{t}: {s} {e}, elapsed {d}".format( + t=self.title, + s="fail" if exc_type else "end", + e=self.end_time, d=self.elapsed + )) + + def __call__(self, f: Callable): + """ + Use TimingLogger as function/method decorator + """ + + @functools.wraps(f) + def wrapper(*args, **kwargs): + with self: + return f(*args, **kwargs) + + return wrapper + + +class DeepKeyError(LookupError): + def __init__(self, key, keys): + super(DeepKeyError, self).__init__("{k!r} (from deep key {s!r})".format(k=key, s=keys)) + + +# Sentinel object for `default` argument of `deep_get` +_deep_get_default_undefined = object() + + +def deep_get(data: dict, *keys, default=_deep_get_default_undefined): + """ + Get value deeply from nested dictionaries/lists/tuples + + :param data: nested data structure of dicts, lists, tuples + :param keys: sequence of keys/indexes to traverse + :param default: default value when a key is missing. + By default a DeepKeyError will be raised. + :return: + """ + for key in keys: + if isinstance(data, dict) and key in data: + data = data[key] + elif isinstance(data, (list, tuple)) and isinstance(key, int) and 0 <= key < len(data): + data = data[key] + else: + if default is _deep_get_default_undefined: + raise DeepKeyError(key, keys) + else: + return default + return data + + +def deep_set(data: dict, *keys, value): + """ + Set a value deeply in nested dictionary + + :param data: nested data structure of dicts, lists, tuples + :param keys: sequence of keys/indexes to traverse + :param value: value to set + """ + if len(keys) == 1: + data[keys[0]] = value + elif len(keys) > 1: + if isinstance(data, dict): + deep_set(data.setdefault(keys[0], OrderedDict()), *keys[1:], value=value) + elif isinstance(data, (list, tuple)): + deep_set(data[keys[0]], *keys[1:], value=value) + else: + ValueError(data) + else: + raise ValueError("No keys given") + + +def guess_format(filename: Union[str, Path]) -> Union[str, None]: + """ + Guess the output format from a given filename and return the corrected format. + Any names not in the dict get passed through. + """ + extension = Path(filename).suffix + if not extension: + return None + extension = extension[1:].lower() + + format_map = { + "gtiff": "GTiff", + "geotiff": "GTiff", + "geotif": "GTiff", + "tiff": "GTiff", + "tif": "GTiff", + "nc": "netCDF", + "netcdf": "netCDF", + "geojson": "GeoJSON", + } + + return format_map.get(extension, extension.upper()) + + +def load_json(path: Union[Path, str]) -> dict: + with Path(path).open("r", encoding="utf-8") as f: + return json.load(f) + + +def load_json_resource(src: Union[str, Path]) -> dict: + """ + Helper to load some kind of JSON resource + + :param src: a JSON resource: a raw JSON string, + a path to (local) JSON file, or a URL to a remote JSON resource + :return: data structured parsed from JSON + """ + if isinstance(src, str) and src.strip().startswith("{"): + # Assume source is a raw JSON string + return json.loads(src) + elif isinstance(src, str) and re.match(r"^https?://", src, flags=re.I): + # URL to remote JSON resource + return requests.get(src).json() + elif isinstance(src, Path) or (isinstance(src, str) and src.endswith(".json")): + # Assume source is a local JSON file path + return load_json(src) + raise ValueError(src) + + +class LazyLoadCache: + """Simple cache that allows to (lazy) load on cache miss.""" + + def __init__(self): + self._cache = {} + + def get(self, key: Union[str, tuple], load: Callable[[], Any]): + if key not in self._cache: + self._cache[key] = load() + return self._cache[key] + + +def str_truncate(text: str, width: int = 64, ellipsis: str = "...") -> str: + """Shorten a string (with an ellipsis) if it is longer than certain length.""" + width = max(0, int(width)) + if len(text) <= width: + return text + if len(ellipsis) > width: + ellipsis = ellipsis[:width] + return text[:max(0, (width - len(ellipsis)))] + ellipsis + + +def repr_truncate(obj: Any, width: int = 64, ellipsis: str = "...") -> str: + """Do `repr` rendering of an object, but truncate string if it is too long .""" + if isinstance(obj, str) and width > len(ellipsis) + 2: + # Special case: put ellipsis inside quotes + return repr(str_truncate(text=obj, width=width - 2, ellipsis=ellipsis)) + else: + # General case: just put ellipsis at end + return str_truncate(text=repr(obj), width=width, ellipsis=ellipsis) + + +def in_interactive_mode() -> bool: + """Detect if we are running in interactive mode (Jupyter/IPython/repl)""" + # Based on https://stackoverflow.com/a/64523765 + return hasattr(sys, "ps1") + + +class InvalidBBoxException(ValueError): + pass + + +class BBoxDict(dict): + """ + Dictionary based helper to easily create/work with bounding box dictionaries + (having keys "west", "south", "east", "north", and optionally "crs"). + + :param crs: value describing the coordinate reference system. + Typically just an int (interpreted as EPSG code, e.g. ``4326``) + or a string (handled as authority string, e.g. ``"EPSG:4326"``). + See :py:func:`openeo.util.normalize_crs` for more details about additional normalization that is applied to this argument. + + .. versionadded:: 0.10.1 + """ + + def __init__(self, *, west: float, south: float, east: float, north: float, crs: Optional[Union[str, int]] = None): + super().__init__(west=west, south=south, east=east, north=north) + if crs is not None: + self.update(crs=normalize_crs(crs)) + + # TODO: provide west, south, east, north, crs as @properties? Read-only or read-write? + + @classmethod + def from_any(cls, x: Any, *, crs: Optional[str] = None) -> BBoxDict: + if isinstance(x, dict): + if crs and "crs" in x and crs != x["crs"]: + raise InvalidBBoxException(f"Two CRS values specified: {crs} and {x['crs']}") + return cls.from_dict({"crs": crs, **x}) + elif isinstance(x, (list, tuple)): + return cls.from_sequence(x, crs=crs) + elif isinstance(x, shapely.geometry.base.BaseGeometry): + return cls.from_sequence(x.bounds, crs=crs) + # TODO: support other input? E.g.: WKT string, GeoJson-style dictionary (Polygon, FeatureCollection, ...) + else: + raise InvalidBBoxException(f"Can not construct BBoxDict from {x!r}") + + @classmethod + def from_dict(cls, data: dict) -> BBoxDict: + """Build from dictionary with at least keys "west", "south", "east", and "north".""" + expected_fields = {"west", "south", "east", "north"} + # TODO: also support upper case fields? + # TODO: optional support for parameterized bbox fields? + missing = expected_fields.difference(data.keys()) + if missing: + raise InvalidBBoxException(f"Missing bbox fields {sorted(missing)}") + invalid = {k: data[k] for k in expected_fields if not isinstance(data[k], (int, float))} + if invalid: + raise InvalidBBoxException(f"Non-numerical bbox fields {invalid}.") + return cls(west=data["west"], south=data["south"], east=data["east"], north=data["north"], crs=data.get("crs")) + + @classmethod + def from_sequence(cls, seq: Union[list, tuple], crs: Optional[str] = None) -> BBoxDict: + """Build from sequence of 4 bounds (west, south, east and north).""" + if len(seq) != 4: + raise InvalidBBoxException(f"Expected sequence with 4 items, but got {len(seq)}.") + return cls(west=seq[0], south=seq[1], east=seq[2], north=seq[3], crs=crs) + + +def to_bbox_dict(x: Any, *, crs: Optional[Union[str, int]] = None) -> BBoxDict: + """ + Convert given data or object to a bounding box dictionary + (having keys "west", "south", "east", "north", and optionally "crs"). + + Supports various input types/formats: + + - list/tuple (assumed to be in west-south-east-north order) + + >>> to_bbox_dict([3, 50, 4, 51]) + {'west': 3, 'south': 50, 'east': 4, 'north': 51} + + - dictionary (unnecessary items will be stripped) + + >>> to_bbox_dict({ + ... "color": "red", "shape": "triangle", + ... "west": 1, "south": 2, "east": 3, "north": 4, "crs": "EPSG:4326", + ... }) + {'west': 1, 'south': 2, 'east': 3, 'north': 4, 'crs': 'EPSG:4326'} + + - a shapely geometry + + .. versionadded:: 0.10.1 + + :param x: input data that describes west-south-east-north bounds in some way, e.g. as a dictionary, + a list, a tuple, ashapely geometry, ... + :param crs: (optional) CRS field + :return: dictionary (subclass) with keys "west", "south", "east", "north", and optionally "crs". + """ + return BBoxDict.from_any(x=x, crs=crs) + + +def url_join(root_url: str, path: str): + """Join a base url and sub path properly.""" + return urljoin(root_url.rstrip("/") + "/", path.lstrip("/")) + + +def clip(x: float, min: float, max: float) -> float: + """Clip given value between minimum and maximum value""" + return min if x < min else (x if x < max else max) + + +class SimpleProgressBar: + """Simple ASCII-based progress bar helper.""" + + __slots__ = ["width", "bar", "fill", "left", "right"] + + def __init__(self, width: int = 40, *, bar: str = "#", fill: str = "-", left: str = "[", right: str = "]"): + self.width = int(width) + self.bar = bar[0] + self.fill = fill[0] + self.left = left + self.right = right + + def get(self, fraction: float) -> str: + width = self.width - len(self.left) - len(self.right) + bar = self.bar * int(round(width * clip(fraction, min=0, max=1))) + return f"{self.left}{bar:{self.fill}<{width}s}{self.right}" + + +def normalize_crs(crs: Any, *, use_pyproj: bool = True) -> Union[None, int, str]: + """ + Normalize the given value (describing a CRS or Coordinate Reference System) + to an openEO compatible EPSG code (int) or WKT2 CRS string. + + At minimum, the following input values are handled: + + - an integer value (e.g. ``4326``) is interpreted as an EPSG code + - a string that just contains an integer (e.g. ``"4326"``) + or with and additional ``"EPSG:"`` prefix (e.g. ``"EPSG:4326"``) + will also be interpreted as an EPSG value + + Additional support and behavior depends on the availability of the ``pyproj`` library: + + - When available, it will be used for parsing and validation: + everything supported by `pyproj.CRS.from_user_input `_ is allowed. + See the ``pyproj`` docs for more details. + - Otherwise, some best effort validation is done: + EPSG looking integer or string values will be parsed as such as discussed above. + Other strings will be assumed to be WKT2 already. + Other data structures will not be accepted. + + :param crs: value that encodes a coordinate reference system, typically just an int (EPSG code) or string (authority string). + If the ``pyproj`` library is available, everything supported by it is allowed. + + :param use_pyproj: whether ``pyproj`` should be leveraged at all + (mainly useful for testing the "no pyproj available" code path) + + :return: EPSG code as int, or WKT2 string. Or None if input was empty. + + :raises ValueError: + When the given CRS data can not be parsed/converted/normalized. + + """ + if crs in (None, "", {}): + return None + + if pyproj and use_pyproj: + try: + # (if available:) let pyproj do the validation/parsing + crs_obj = pyproj.CRS.from_user_input(crs) + # Convert back to EPSG int or WKT2 string + crs = crs_obj.to_epsg() or crs_obj.to_wkt() + except pyproj.ProjError as e: + raise ValueError(f"Failed to normalize CRS data with pyproj: {crs!r}") from e + else: + # Best effort simple validation/normalization + if isinstance(crs, int) and crs > 0: + # Assume int is already valid EPSG code + pass + elif isinstance(crs, str): + # Parse as EPSG int code if it looks like that, + # otherwise: leave it as-is, assuming it is a valid WKT2 CRS string + if re.match(r"^(epsg:)?\d+$", crs.strip(), flags=re.IGNORECASE): + crs = int(crs.split(":")[-1]) + elif "GEOGCRS[" in crs: + # Very simple WKT2 CRS detection heuristic + logger.warning(f"Assuming this is a valid WK2 CRS string: {repr_truncate(crs)}") + else: + raise ValueError(f"Can not normalize CRS string {repr_truncate(crs)}") + else: + raise ValueError(f"Can not normalize CRS data {type(crs)}") + + return crs diff --git a/machine_learning.html b/machine_learning.html new file mode 100644 index 000000000..c60c4850b --- /dev/null +++ b/machine_learning.html @@ -0,0 +1,244 @@ + + + + + + + + Machine Learning — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +
+

Machine Learning

+
+

Warning

+

This API and documentation is experimental, +under heavy development and subject to change.

+
+
+

Added in version 0.10.0.

+
+
+

Random Forest based Classification and Regression

+

openEO defines a couple of processes for random forest based machine learning +for Earth Observation applications:

+
    +
  • fit_class_random_forest for training a random forest based classification model

  • +
  • fit_regr_random_forest for training a random forest based regression model

  • +
  • predict_random_forest for inference/prediction

  • +
+

The openEO Python Client library provides the necessary functionality to set up +and execute training and inference workflows.

+
+

Training

+

Let’s focus on training a classification model, where we try to predict +a class like a land cover type or crop type based on predictors +we derive from EO data. +For example, assume we have a GeoJSON FeatureCollection +of sample points and a corresponding classification target value as follows:

+
feature_collection = {"type": "FeatureCollection", "features": [
+    {
+        "type": "Feature",
+        "properties": {"id": "b3dw-wd23", "target": 3},
+        "geometry": {"type": "Point", "coordinates": [3.4, 51.1]}
+    },
+    {
+        "type": "Feature",
+        "properties": {"id": "r8dh-3jkd", "target": 5},
+        "geometry": {"type": "Point", "coordinates": [3.6, 51.2]}
+    },
+    ...
+
+
+
+

Note

+

Confusingly, the concept “feature” has somewhat conflicting meanings +for different audiences. GIS/EO people use “feature” to refer to the “rows” +in this feature collection. +For the machine learning community however, the properties (the “columns”) +are the features. +To avoid confusion in this discussion we will avoid the term “feature” +and instead use “sample point” for the former and “predictor” for the latter.

+
+

We first build a datacube of “predictor” bands. +For simplicity, we will just use the raw B02/B03/B04 band values here +and use the temporal mean to eliminate the time dimension:

+
cube = connection.load_collection(
+    "SENTINEL2",
+    temporal_extent=[start, end],
+    spatial_extent=bbox,
+    bands=["B02", "B03", "B04"]
+)
+cube = cube.reduce_dimension(dimension="t", reducer="mean")
+
+
+

We now use aggregate_spatial to sample this raster data cube at the sample points +and get a vector cube where we have the temporal mean of the B02/B03/B04 bands as predictor values:

+
predictors = cube.aggregate_spatial(feature_collection, reducer="mean")
+
+
+

We can now train a Random Forest model by calling the +fit_class_random_forest() method on the predictor vector cube +and passing the original target class data:

+
model = predictors.fit_class_random_forest(
+    target=feature_collection,
+)
+# Save the model as a batch job result asset
+# so that we can load it in another job.
+model = model.save_ml_model()
+
+
+

Finally execute this whole training flow as a batch job:

+
training_job = model.create_job()
+training_job.start_and_wait()
+
+
+
+
+

Inference

+

When the batch job finishes successfully, the trained model can then be used +with the predict_random_forest process on the raster data cube +(or another cube with the same band structure) to classify all the pixels.

+

Technically, the openEO predict_random_forest process has to be used as a reducer function +inside a reduce_dimension call, but the openEO Python client library makes it +a bit easier by providing a predict_random_forest() method +directly on the DataCube class, so that you can just do:

+
predicted = cube.predict_random_forest(
+    model=training_job.job_id,
+    dimension="bands"
+)
+
+predicted.download("predicted.GTiff")
+
+
+

We specified the model here by batch job id (string), +but it can also be specified in other ways: +as BatchJob instance, +as URL to the corresponding STAC Item that implements the ml-model extension, +or as MlModel instance (e.g. loaded through +load_ml_model()).

+
+
+
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/objects.inv b/objects.inv new file mode 100644 index 000000000..623843bc0 Binary files /dev/null and b/objects.inv differ diff --git a/process_mapping.html b/process_mapping.html new file mode 100644 index 000000000..be5fe213e --- /dev/null +++ b/process_mapping.html @@ -0,0 +1,609 @@ + + + + + + + + openEO Process Mapping — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +
+

openEO Process Mapping

+

The table below maps openEO processes to the corresponding +method or function in the openEO Python Client Library.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

openEO process

openEO Python Client Method

absolute

ProcessBuilder.absolute(), absolute()

add

ProcessBuilder.__add__(), ProcessBuilder.__radd__(), ProcessBuilder.add(), add(), DataCube.add(), DataCube.__add__(), DataCube.__radd__()

add_dimension

ProcessBuilder.add_dimension(), add_dimension(), DataCube.add_dimension()

aggregate_spatial

ProcessBuilder.aggregate_spatial(), aggregate_spatial(), DataCube.aggregate_spatial()

aggregate_spatial_window

ProcessBuilder.aggregate_spatial_window(), aggregate_spatial_window(), DataCube.aggregate_spatial_window()

aggregate_temporal

ProcessBuilder.aggregate_temporal(), aggregate_temporal(), DataCube.aggregate_temporal()

aggregate_temporal_period

ProcessBuilder.aggregate_temporal_period(), aggregate_temporal_period(), DataCube.aggregate_temporal_period()

all

ProcessBuilder.all(), all()

and

DataCube.logical_and(), DataCube.__and__()

and_

ProcessBuilder.and_(), and_()

anomaly

ProcessBuilder.anomaly(), anomaly()

any

ProcessBuilder.any(), any()

apply

ProcessBuilder.apply(), apply(), DataCube.apply()

apply_dimension

ProcessBuilder.apply_dimension(), apply_dimension(), DataCube.apply_dimension()

apply_kernel

ProcessBuilder.apply_kernel(), apply_kernel(), DataCube.apply_kernel()

apply_neighborhood

ProcessBuilder.apply_neighborhood(), apply_neighborhood(), DataCube.apply_neighborhood()

arccos

ProcessBuilder.arccos(), arccos()

arcosh

ProcessBuilder.arcosh(), arcosh()

arcsin

ProcessBuilder.arcsin(), arcsin()

arctan

ProcessBuilder.arctan(), arctan()

arctan2

ProcessBuilder.arctan2(), arctan2()

ard_normalized_radar_backscatter

ProcessBuilder.ard_normalized_radar_backscatter(), ard_normalized_radar_backscatter(), DataCube.ard_normalized_radar_backscatter()

ard_surface_reflectance

ProcessBuilder.ard_surface_reflectance(), ard_surface_reflectance(), DataCube.ard_surface_reflectance()

array_append

ProcessBuilder.array_append(), array_append()

array_apply

ProcessBuilder.array_apply(), array_apply()

array_concat

ProcessBuilder.array_concat(), array_concat()

array_contains

ProcessBuilder.array_contains(), array_contains()

array_create

ProcessBuilder.array_create(), array_create()

array_create_labeled

ProcessBuilder.array_create_labeled(), array_create_labeled()

array_element

ProcessBuilder.__getitem__(), ProcessBuilder.array_element(), array_element()

array_filter

ProcessBuilder.array_filter(), array_filter()

array_find

ProcessBuilder.array_find(), array_find()

array_find_label

ProcessBuilder.array_find_label(), array_find_label()

array_interpolate_linear

ProcessBuilder.array_interpolate_linear(), array_interpolate_linear()

array_labels

ProcessBuilder.array_labels(), array_labels()

array_modify

ProcessBuilder.array_modify(), array_modify()

arsinh

ProcessBuilder.arsinh(), arsinh()

artanh

ProcessBuilder.artanh(), artanh()

atmospheric_correction

ProcessBuilder.atmospheric_correction(), atmospheric_correction(), DataCube.atmospheric_correction()

between

ProcessBuilder.between(), between()

ceil

ProcessBuilder.ceil(), ceil()

climatological_normal

ProcessBuilder.climatological_normal(), climatological_normal()

clip

ProcessBuilder.clip(), clip()

cloud_detection

ProcessBuilder.cloud_detection(), cloud_detection()

constant

ProcessBuilder.constant(), constant()

cos

ProcessBuilder.cos(), cos()

cosh

ProcessBuilder.cosh(), cosh()

count

ProcessBuilder.count(), count(), DataCube.count_time()

create_raster_cube

ProcessBuilder.create_raster_cube(), create_raster_cube()

cummax

ProcessBuilder.cummax(), cummax()

cummin

ProcessBuilder.cummin(), cummin()

cumproduct

ProcessBuilder.cumproduct(), cumproduct()

cumsum

ProcessBuilder.cumsum(), cumsum()

date_shift

ProcessBuilder.date_shift(), date_shift()

dimension_labels

ProcessBuilder.dimension_labels(), dimension_labels(), DataCube.dimension_labels()

divide

ProcessBuilder.__truediv__(), ProcessBuilder.__rtruediv__(), ProcessBuilder.divide(), divide(), DataCube.divide(), DataCube.__truediv__(), DataCube.__rtruediv__()

drop_dimension

ProcessBuilder.drop_dimension(), drop_dimension(), DataCube.drop_dimension()

e

ProcessBuilder.e(), e()

eq

ProcessBuilder.__eq__(), ProcessBuilder.eq(), eq(), DataCube.__eq__()

exp

ProcessBuilder.exp(), exp()

extrema

ProcessBuilder.extrema(), extrema()

filter_bands

ProcessBuilder.filter_bands(), filter_bands(), DataCube.filter_bands()

filter_bbox

ProcessBuilder.filter_bbox(), filter_bbox(), DataCube.filter_bbox()

filter_labels

ProcessBuilder.filter_labels(), filter_labels()

filter_spatial

ProcessBuilder.filter_spatial(), filter_spatial(), DataCube.filter_spatial()

filter_temporal

ProcessBuilder.filter_temporal(), filter_temporal(), DataCube.filter_temporal()

first

ProcessBuilder.first(), first()

fit_class_random_forest

ProcessBuilder.fit_class_random_forest(), fit_class_random_forest(), VectorCube.fit_class_random_forest()

fit_curve

ProcessBuilder.fit_curve(), fit_curve(), DataCube.fit_curve()

fit_regr_random_forest

ProcessBuilder.fit_regr_random_forest(), fit_regr_random_forest(), VectorCube.fit_regr_random_forest()

flatten_dimensions

ProcessBuilder.flatten_dimensions(), flatten_dimensions(), DataCube.flatten_dimensions()

floor

ProcessBuilder.floor(), floor()

ge

ProcessBuilder.__ge__(), DataCube.__ge__()

gt

ProcessBuilder.__gt__(), ProcessBuilder.gt(), gt(), DataCube.__gt__()

gte

ProcessBuilder.gte(), gte()

if_

ProcessBuilder.if_(), if_()

inspect

ProcessBuilder.inspect(), inspect()

int

ProcessBuilder.int(), int()

is_infinite

ProcessBuilder.is_infinite(), is_infinite()

is_nan

ProcessBuilder.is_nan(), is_nan()

is_nodata

ProcessBuilder.is_nodata(), is_nodata()

is_valid

ProcessBuilder.is_valid(), is_valid()

last

ProcessBuilder.last(), last()

le

DataCube.__le__()

linear_scale_range

ProcessBuilder.linear_scale_range(), linear_scale_range(), DataCube.linear_scale_range()

ln

ProcessBuilder.ln(), ln(), DataCube.ln()

load_collection

ProcessBuilder.load_collection(), load_collection(), DataCube.load_collection(), Connection.load_collection()

load_geojson

VectorCube.load_geojson(), Connection.load_geojson()

load_ml_model

ProcessBuilder.load_ml_model(), load_ml_model(), MlModel.load_ml_model()

load_result

ProcessBuilder.load_result(), load_result(), Connection.load_result()

load_stac

Connection.load_stac()

load_uploaded_files

ProcessBuilder.load_uploaded_files(), load_uploaded_files()

log

ProcessBuilder.log(), log(), DataCube.logarithm(), DataCube.log2(), DataCube.log10()

lt

ProcessBuilder.__lt__(), ProcessBuilder.lt(), lt(), DataCube.__lt__()

lte

ProcessBuilder.__le__(), ProcessBuilder.lte(), lte()

mask

ProcessBuilder.mask(), mask(), DataCube.mask()

mask_polygon

ProcessBuilder.mask_polygon(), mask_polygon(), DataCube.mask_polygon()

max

ProcessBuilder.max(), max(), DataCube.max_time()

mean

ProcessBuilder.mean(), mean(), DataCube.mean_time()

median

ProcessBuilder.median(), median(), DataCube.median_time()

merge_cubes

ProcessBuilder.merge_cubes(), merge_cubes(), DataCube.merge_cubes()

min

ProcessBuilder.min(), min(), DataCube.min_time()

mod

ProcessBuilder.mod(), mod()

multiply

ProcessBuilder.__mul__(), ProcessBuilder.__rmul__(), ProcessBuilder.__neg__(), ProcessBuilder.multiply(), multiply(), DataCube.multiply(), DataCube.__neg__(), DataCube.__mul__(), DataCube.__rmul__()

nan

ProcessBuilder.nan(), nan()

ndvi

ProcessBuilder.ndvi(), ndvi(), DataCube.ndvi()

neq

ProcessBuilder.__ne__(), ProcessBuilder.neq(), neq(), DataCube.__ne__()

normalized_difference

ProcessBuilder.normalized_difference(), normalized_difference(), DataCube.normalized_difference()

not

DataCube.__invert__()

not_

ProcessBuilder.not_(), not_()

or

DataCube.logical_or(), DataCube.__or__()

or_

ProcessBuilder.or_(), or_()

order

ProcessBuilder.order(), order()

pi

ProcessBuilder.pi(), pi()

power

ProcessBuilder.__pow__(), ProcessBuilder.power(), power(), DataCube.__rpow__(), DataCube.__pow__(), DataCube.power()

predict_curve

ProcessBuilder.predict_curve(), predict_curve(), DataCube.predict_curve()

predict_random_forest

ProcessBuilder.predict_random_forest(), predict_random_forest(), DataCube.predict_random_forest()

product

ProcessBuilder.product(), product()

quantiles

ProcessBuilder.quantiles(), quantiles()

rearrange

ProcessBuilder.rearrange(), rearrange()

reduce_dimension

ProcessBuilder.reduce_dimension(), reduce_dimension(), DataCube.reduce_dimension()

reduce_spatial

ProcessBuilder.reduce_spatial(), reduce_spatial()

rename_dimension

ProcessBuilder.rename_dimension(), rename_dimension(), DataCube.rename_dimension()

rename_labels

ProcessBuilder.rename_labels(), rename_labels(), DataCube.rename_labels()

resample_cube_spatial

ProcessBuilder.resample_cube_spatial(), resample_cube_spatial()

resample_cube_temporal

ProcessBuilder.resample_cube_temporal(), resample_cube_temporal(), DataCube.resample_cube_temporal()

resample_spatial

ProcessBuilder.resample_spatial(), resample_spatial(), DataCube.resample_spatial()

resolution_merge

DataCube.resolution_merge()

round

ProcessBuilder.round(), round()

run_udf

ProcessBuilder.run_udf(), run_udf(), VectorCube.run_udf()

run_udf_externally

ProcessBuilder.run_udf_externally(), run_udf_externally()

sar_backscatter

ProcessBuilder.sar_backscatter(), sar_backscatter(), DataCube.sar_backscatter()

save_ml_model

ProcessBuilder.save_ml_model(), save_ml_model()

save_result

ProcessBuilder.save_result(), save_result(), VectorCube.save_result(), DataCube.save_result()

sd

ProcessBuilder.sd(), sd()

sgn

ProcessBuilder.sgn(), sgn()

sin

ProcessBuilder.sin(), sin()

sinh

ProcessBuilder.sinh(), sinh()

sort

ProcessBuilder.sort(), sort()

sqrt

ProcessBuilder.sqrt(), sqrt()

subtract

ProcessBuilder.__sub__(), ProcessBuilder.__rsub__(), ProcessBuilder.subtract(), subtract(), DataCube.subtract(), DataCube.__sub__(), DataCube.__rsub__()

sum

ProcessBuilder.sum(), sum()

tan

ProcessBuilder.tan(), tan()

tanh

ProcessBuilder.tanh(), tanh()

text_begins

ProcessBuilder.text_begins(), text_begins()

text_concat

ProcessBuilder.text_concat(), text_concat()

text_contains

ProcessBuilder.text_contains(), text_contains()

text_ends

ProcessBuilder.text_ends(), text_ends()

trim_cube

ProcessBuilder.trim_cube(), trim_cube()

unflatten_dimension

ProcessBuilder.unflatten_dimension(), unflatten_dimension(), DataCube.unflatten_dimension()

variance

ProcessBuilder.variance(), variance()

vector_buffer

ProcessBuilder.vector_buffer(), vector_buffer()

vector_to_random_points

ProcessBuilder.vector_to_random_points(), vector_to_random_points()

vector_to_regular_points

ProcessBuilder.vector_to_regular_points(), vector_to_regular_points()

xor

ProcessBuilder.xor(), xor()

+

(Table autogenerated on 2023-08-07)

+
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/processes.html b/processes.html new file mode 100644 index 000000000..14bfba9ba --- /dev/null +++ b/processes.html @@ -0,0 +1,539 @@ + + + + + + + + Working with processes — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +
+

Working with processes

+

In openEO, a process is an operation that performs a specific task on +a set of parameters and returns a result. +For example, with the add process you can add two numbers, in openEO’s JSON notation:

+
{
+    "process_id": "add",
+    "arguments": {"x": 3, "y": 5}
+}
+
+
+

A process is similar to a function in common programming languages, +and likewise, multiple processes can be combined or chained together +into new, more complex operations.

+
+

A bit of terminology

+

A pre-defined process is a process provided out of the box by a given back-end. +These are often the centrally defined openEO processes, +such as common mathematical (sum, divide, sqrt, …), +statistical (mean, max, …) and +image processing (mask, apply_kernel, …) +operations. +Back-ends are expected to support most of these standard ones, +but are free to pre-define additional ones too.

+

Processes can be combined into a larger pipeline, parameterized +and stored on the back-end as a so called user-defined process. +This allows you to build a library of reusable building blocks +that can be be inserted easily in multiple other places. +See User-Defined Processes (UDP) for more information.

+

How processes are combined into a larger unit +is internally represented by a so-called process graph. +It describes how the inputs and outputs of processes +should be linked together. +A user of the Python client should normally not worry about +the details of a process graph structure, as most of these aspects +are hidden behind regular Python functions, classes and methods.

+
+
+

Using common pre-defined processes

+

The listing of pre-defined processes provided by a back-end +can be inspected with list_processes(). +For example, to get a list of the process names (process ids):

+
>>> process_ids = [process["id"] for process in connection.list_processes()]
+>>> print(process_ids[:16])
+['arccos', 'arcosh', 'power', 'last', 'subtract', 'not', 'cosh', 'artanh',
+'is_valid', 'first', 'median', 'eq', 'absolute', 'arctan2', 'divide','is_nan']
+
+
+

More information about the processes, like a description +or expected parameters, can be queried like that, +but it is often easier to look them up on the +official openEO process documentation

+

A single pre-defined process can be retrieved with +describe_process().

+
+

Convenience methods

+

Most of the important pre-defined processes are covered directly by methods +on classes like DataCube or +VectorCube.

+
+

See also

+

See openEO Process Mapping for a mapping of openEO processes +the corresponding methods in the openEO Python Client library.

+
+

For example, to apply the filter_temporal process to a raster data cube:

+
cube = cube.filter_temporal("2020-02-20", "2020-06-06")
+
+
+

Being regular Python methods, you get usual function call features +you’re accustomed to: default values, keyword arguments, kwargs usage, … +For example, to use a bounding box dictionary with kwargs-expansion:

+
bbox = {
+    "west": 5.05, "south": 51.20, "east": 5.10, "north": 51.23
+}
+cube = cube.filter_bbox(**bbox)
+
+
+

Note that some methods try to be more flexible and convenient to use +than how the official process definition prescribes. +For example, the filter_temporal process expects an extent array +with 2 items (the start and end date), +but you can call the corresponding client method in multiple equivalent ways:

+
cube.filter_temporal("2019-07-01", "2019-08-01")
+cube.filter_temporal(["2019-07-01", "2019-08-01"])
+cube.filter_temporal(extent=["2019-07-01", "2019-08-01"])
+cube.filter_temporal(start_date="2019-07-01", end_date="2019-08-01"])
+
+
+
+
+

Advanced argument tweaking

+
+

Added in version 0.10.1.

+
+

In some situations, you may want to finetune what the (convenience) methods generate. +For example, you want to play with non-standard, experimental arguments, +or there is a problem with a automatic argument handling/conversion feature.

+

You can tweak the arguments of your current result node as follows. +Say, we want to add some non-standard feature_flags argument to the load_collection process node. +We first get the current result node with result_node() and use update_arguments() to add an additional argument to it:

+
# `Connection.load_collection` does not support `feature_flags` argument
+cube = connection.load_collection(...)
+
+# Add `feature_flag` argument `load_collection` process graph node
+cube.result_node().update_arguments(feature_flags="rXPk")
+
+# The resulting process graph will now contain this non-standard argument:
+#     {
+#         "process_id": "load_collection",
+#         "arguments": {
+#             ...
+#             "feature_flags": "rXPk",
+
+
+
+
+
+

Generic API for adding processes

+

An openEO back-end may offer processes that are not part of the core API, +or the client may not (yet) have a corresponding method +for a process that you wish to use. +In that case, you can fall back to a more generic API +that allows you to add processes directly.

+
+

Basics

+

To add a simple process to the graph, use +the process() method +on a DataCube. +You have to specify the process id and arguments +(as a single dictionary or through keyword arguments **kwargs). +It will return a new DataCube with the new process appended +to the internal process graph.

+

A very simple example using the mean process and a +literal list in an arguments dictionary:

+
arguments= {
+    "data": [1, 3, -1]
+}
+res = cube.process("mean", arguments)
+
+
+

or equivalently, leveraging keyword arguments:

+
res = cube.process("mean", data=[1, 3, -1])
+
+
+
+
+

Passing data cube arguments

+

The example above is a bit convoluted however in the sense that +you start from a given data cube cube, you add a mean process +that works on a given data array, while completely ignoring the original cube. +In reality you typically want to apply the process on the cube. +This is possible by passing a data cube object directly as argument, +for example with the ndvi process that at least expects +a data cube as data argument

+
res = cube.process("ndvi", data=cube)
+
+
+

Note that you have to specify cube twice here: +a first time to call the method and a second time as argument. +Moreover, it requires you to define a Python variable for the data +cube, which is annoying if you want to use a chained expressions. +To solve these issues, you can use the THIS +constant as symbolic reference to the “current” cube:

+
from openeo.rest.datacube import THIS
+
+res = (
+    cube
+        .process("filter_bands", data=THIS)
+        .process("mask", data=THIS, mask=mask)
+        .process("ndvi", data=THIS)
+)
+
+
+
+
+

Passing results from other process calls as arguments

+

Another use case of generically applying (custom) processes is +passing a process result as argument to another process working on a cube. +For example, assume we have a custom process load_my_vector_cube +to load a vector cube from an online resource. +We can use this vector cube as geometry for +DataCube.aggregate_spatial() +using openeo.processes.process() as follows:

+
from openeo.processes import process
+
+res = cube.aggregate_spatial(
+    geometries=process("load_my_vector_cube", url="https://geo.example/features.db"),
+    reducer="mean"
+)
+
+
+
+
+
+

Processes with child “callbacks”

+

Some openEO processes expect some kind of sub-process +to be invoked on a subset or slice of the datacube. +For example:

+
    +
  • process apply requires a transformation that will be applied +to each pixel in the cube (separately), e.g. in pseudocode

    +
    cube.apply(
    +    given a pixel value
    +    => scale it with factor 0.01
    +)
    +
    +
    +
  • +
  • process reduce_dimension requires an aggregation function to convert +an array of pixel values (along a given dimension) to a single value, +e.g. in pseudocode

    +
    cube.reduce_dimension(
    +    given a pixel timeseries (array) for a (x,y)-location
    +    => temporal mean of that array
    +)
    +
    +
    +
  • +
  • process aggregate_spatial requires a function to aggregate the values +in one or more geometries

  • +
+

These transformation functions are usually called “callbacks” +because instead of being called explicitly by the user, +they are called and managed by their “parent” process +(the apply, reduce_dimension and aggregate_spatial in the examples)

+

The openEO Python Client Library currently provides a couple of DataCube methods +that expect such a callback, most commonly:

+ +

The openEO Python Client Library supports several ways +to specify the desired callback for these functions:

+ +
+

Callback as string

+

The easiest way is passing a process name as a string, +for example:

+
# Take the absolute value of each pixel
+cube.apply("absolute")
+
+# Reduce a cube along the temporal dimension by taking the maximum value
+cube.reduce_dimension(reducer="max", dimension="t")
+
+
+

This approach is only possible if the desired transformation is available +as a single process. If not, use one of the methods below.

+

It’s also important to note that the “signature” of the provided callback process +should correspond properly with what the parent process expects. +For example: apply requires a callback process that receives a +number and returns one (like absolute or sqrt), +while reduce_dimension requires a callback process that receives +an array of numbers and returns a single number (like max or mean).

+
+
+

Callback as a callable

+

You can also specify the callback as a “callable”: +which is a fancy word for a Python object that can be called, +but just think of it like a function you can call.

+

You can use a regular Python function, like this:

+
def transform(x):
+    return x * 2 + 3
+
+cube.apply(transform)
+
+
+

or, more compactly, a “lambda” +(a construct in Python to create anonymous inline functions):

+
cube.apply(lambda x: x * 2 + 3)
+
+
+

The openEO Python Client Library implements most of the official openEO processes as +functions in the “openeo.processes” module, +which can be used directly as callback:

+
from openeo.processes import absolute, max
+
+cube.apply(absolute)
+cube.reduce_dimension(reducer=max, dimension="t")
+
+
+

The argument that will be passed to all these callback functions is +a ProcessBuilder instance. +This is a helper object with predefined methods for all standard openEO processes, +allowing to use an object oriented coding style to define the callback. +For example:

+
from openeo.processes import ProcessBuilder
+
+def avg(data: ProcessBuilder):
+    return data.mean()
+
+cube.reduce_dimension(reducer=avg, dimension="t")
+
+
+

These methods also return ProcessBuilder objects, +which also allows writing callbacks in chained fashion:

+
cube.apply(
+    lambda x: x.absolute().cos().add(y=1.23)
+)
+
+
+

All this gives a lot of flexibility to define callbacks compactly +in a desired coding style. +The following examples result in the same callback:

+
from openeo.processes import ProcessBuilder, mean, cos, add
+
+# Chained methods
+cube.reduce_dimension(
+    lambda data: data.mean().cos().add(y=1.23),
+    dimension="t"
+)
+
+# Functions
+cube.reduce_dimension(
+    lambda data: add(x=cos(mean(data)), y=1.23),
+    dimension="t"
+)
+
+# Mixing methods, functions and operators
+cube.reduce_dimension(
+    lambda data: cos(data.mean())) + 1.23,
+    dimension="t"
+)
+
+
+
+

Caveats

+

Specifying callbacks through Python functions (or lambdas) +looks intuitive and straightforward, but it should be noted +that not everything is allowed in these functions. +You should just limit yourself to calling +openeo.processes functions, +ProcessBuilder methods +and basic math operators. +Don’t call functions from other libraries like numpy or scipy. +Don’t use Python control flow statements like if/else constructs +or for loops.

+

The reason for this is that the openEO Python Client Library +does not translate the function source code itself +to an openEO process graph. +Instead, when building the openEO process graph, +it passes a special object to the function +and keeps track of which openeo.processes functions +were called to assemble the corresponding process graph. +If you use control flow statements or use numpy functions for example, +this procedure will incorrectly detect what you want to do in the callback.

+

For example, if you mistakenly use the Python builtin sum() function +in a callback instead of openeo.processes.sum(), you will run into trouble. +Luckily the openEO Python client Library should raise an error if it detects that:

+
>>> # Wrongly using builtin `sum` function
+>>> cube.reduce_dimension(dimension="t", reducer=sum)
+RuntimeError: Exceeded ProcessBuilder iteration limit.
+Are you mistakenly using a builtin like `sum()` or `all()` in a callback
+instead of the appropriate helpers from `openeo.processes`?
+
+>>> # Explicit usage of `openeo.processes.sum`
+>>> import openeo.processes
+>>> cube.reduce_dimension(dimension="t", reducer=openeo.processes.sum)
+<openeo.rest.datacube.DataCube at 0x7f6505a40d00>
+
+
+
+
+
+

Callback as PGNode

+

You can also pass a PGNode object as callback.

+
+

Attention

+

This approach should generally not be used in normal use cases. +The other options discussed above should be preferred. +It’s mainly intended for internal use and an occasional, advanced use case. +It requires in-depth knowledge of the openEO API +and openEO Python Client Library to construct correctly.

+
+

Some examples:

+
from openeo.internal.graph_building import PGNode
+
+cube.apply(PGNode(
+    "add",
+    x=PGNode(
+        "cos",
+        x=PGNode("absolute", x={"from_parameter": "x"})
+    ),
+    y=1.23
+))
+
+cube.reduce_dimension(
+    reducer=PGNode("max", data={"from_parameter": "data"}),
+    dimension="bands"
+)
+
+
+
+
+
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/py-modindex.html b/py-modindex.html new file mode 100644 index 000000000..fba248299 --- /dev/null +++ b/py-modindex.html @@ -0,0 +1,272 @@ + + + + + + + Python Module Index — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ + +

Python Module Index

+ +
+ o +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 
+ o
+ openeo +
    + openeo.api.logs +
    + openeo.api.process +
    + openeo.extra.spectral_indices +
    + openeo.internal.graph_building +
    + openeo.metadata +
    + openeo.processes +
    + openeo.rest._datacube +
    + openeo.rest.connection +
    + openeo.rest.conversions +
    + openeo.rest.datacube +
    + openeo.rest.graph_building +
    + openeo.rest.job +
    + openeo.rest.mlmodel +
    + openeo.rest.multiresult +
    + openeo.rest.udp +
    + openeo.rest.userfile +
    + openeo.rest.vectorcube +
    + openeo.testing +
    + openeo.testing.results +
    + openeo.udf.debug +
    + openeo.udf.run_code +
    + openeo.udf.structured_data +
    + openeo.udf.udf_data +
    + openeo.udf.udf_signatures +
    + openeo.udf.xarraydatacube +
    + openeo.util +
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/search.html b/search.html new file mode 100644 index 000000000..cd3902e35 --- /dev/null +++ b/search.html @@ -0,0 +1,144 @@ + + + + + + + Search — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +

Search

+ + + + +

+ Searching for multiple words only shows matches that contain + all words. +

+ + +
+ + + +
+ + +
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/searchindex.js b/searchindex.js new file mode 100644 index 000000000..c0aa641b8 --- /dev/null +++ b/searchindex.js @@ -0,0 +1 @@ +Search.setIndex({"alltitles": {"A bit of terminology": [[24, "a-bit-of-terminology"]], "A first example: apply with an UDF to rescale pixel values": [[25, "a-first-example-apply-with-an-udf-to-rescale-pixel-values"]], "API": [[11, "api"], [14, "module-openeo.extra.spectral_indices"]], "API (General)": [[0, null]], "API: openeo.processes": [[2, null]], "Ad-hoc dependency handling": [[25, "ad-hoc-dependency-handling"]], "Added": [[7, "added"], [7, "id2"], [7, "id6"], [7, "id12"], [7, "id15"], [7, "id19"], [7, "id21"], [7, "id24"], [7, "id27"], [7, "id31"], [7, "id34"], [7, "id41"], [7, "id44"], [7, "id49"], [7, "id55"], [7, "id59"], [7, "id62"], [7, "id65"], [7, "id67"], [7, "id69"], [7, "id74"], [7, "id79"], [7, "id82"], [7, "id89"], [7, "id93"], [7, "id96"], [7, "id98"], [7, "id103"], [7, "id105"], [7, "id109"], [7, "id114"], [7, "id118"], [7, "id121"], [7, "id127"], [7, "id130"], [7, "id134"], [7, "id138"], [7, "id141"], [7, "id144"], [7, "id149"], [7, "id154"], [7, "id157"]], "Advanced argument tweaking": [[24, "advanced-argument-tweaking"]], "Aggregated EVI timeseries": [[4, "aggregated-evi-timeseries"]], "Alternative development installation": [[19, "alternative-development-installation"]], "Analysis Ready Data generation": [[9, null]], "Applicability and Constraints": [[25, "applicability-and-constraints"]], "Applying a cloud mask": [[4, "applying-a-cloud-mask"]], "Atmospheric correction": [[9, "atmospheric-correction"]], "Auth config files and openeo-auth helper tool": [[3, "auth-config-files-and-openeo-auth-helper-tool"]], "Authentication": [[4, "authentication"]], "Authentication and Account Management": [[3, null]], "Authentication for long-running applications and non-interactive contexts": [[3, "authentication-for-long-running-applications-and-non-interactive-contexts"]], "Automatic band mapping": [[14, "automatic-band-mapping"]], "Automatic batch job log printing": [[5, "automatic-batch-job-log-printing"]], "Background": [[12, "background"]], "Background and inspiration": [[6, "background-and-inspiration"]], "Band mapping": [[14, "band-mapping"]], "Band math": [[4, "band-math"]], "Basic HTTP Auth": [[3, "basic-http-auth"]], "Basic HTTP Auth config": [[3, "basic-http-auth-config"]], "Basic ProcessBasedJobCreator example": [[11, "basic-processbasedjobcreator-example"]], "Basic install": [[21, "basic-install"]], "Basics": [[24, "basics"]], "Batch Jobs": [[5, null]], "Batch Jobs (asynchronous execution)": [[4, "batch-jobs-asynchronous-execution"]], "Batch job logs": [[5, "batch-job-logs"]], "Batch job object": [[5, "batch-job-object"]], "Best Practices and Troubleshooting Tips": [[3, "best-practices-and-troubleshooting-tips"]], "Best practices, coding style and general tips": [[6, null]], "Building and storing user-defined process": [[26, "building-and-storing-user-defined-process"]], "Building process graphs with multiple result nodes": [[18, "building-process-graphs-with-multiple-result-nodes"]], "Building the documentation": [[19, "building-the-documentation"]], "Callback as PGNode": [[24, "callback-as-pgnode"]], "Callback as a callable": [[24, "callback-as-a-callable"]], "Callback as string": [[24, "callback-as-string"]], "Caveats": [[24, "caveats"]], "Changed": [[7, "changed"], [7, "id3"], [7, "id16"], [7, "id25"], [7, "id28"], [7, "id32"], [7, "id38"], [7, "id42"], [7, "id45"], [7, "id50"], [7, "id56"], [7, "id60"], [7, "id70"], [7, "id75"], [7, "id80"], [7, "id83"], [7, "id85"], [7, "id90"], [7, "id94"], [7, "id99"], [7, "id106"], [7, "id110"], [7, "id115"], [7, "id119"], [7, "id122"], [7, "id125"], [7, "id131"], [7, "id135"], [7, "id142"], [7, "id145"], [7, "id150"]], "Changelog": [[7, null]], "Clear the refresh token file": [[3, "clear-the-refresh-token-file"]], "Client-side (local) processing": [[12, null]], "Code reuse with user-defined processes": [[26, "code-reuse-with-user-defined-processes"]], "Collection discovery": [[4, "collection-discovery"]], "Computing multiple statistics": [[4, "computing-multiple-statistics"]], "Configuration": [[8, null]], "Configuration files": [[8, "configuration-files"]], "Configuration options": [[8, "configuration-options"]], "Connect to an openEO back-end": [[4, "connect-to-an-openeo-back-end"]], "Construct DataCube from process": [[18, "construct-datacube-from-process"]], "Construct a DataCube from JSON": [[18, "construct-a-datacube-from-json"]], "Contents:": [[10, null]], "Contributing code": [[19, "contributing-code"]], "Convenience methods": [[24, "convenience-methods"]], "Create a batch job": [[5, "create-a-batch-job"]], "Create, start and wait in one go": [[5, "create-start-and-wait-in-one-go"]], "Creating a release": [[19, "creating-a-release"]], "Data discovery": [[17, "data-discovery"]], "DataCube construction": [[18, null]], "Dataset sampling": [[13, null]], "Declaration of UDF dependencies": [[25, "declaration-of-udf-dependencies"]], "Declaring Parameters": [[26, "declaring-parameters"]], "Default openEO back-end URL and auto-authentication": [[3, "default-openeo-back-end-url-and-auto-authentication"]], "Deprecated": [[7, "deprecated"], [7, "id151"]], "Development Installation on Windows": [[19, "development-installation-on-windows"]], "Development and maintenance": [[19, null]], "Directly load batch job results": [[5, "directly-load-batch-job-results"]], "Download (synchronously)": [[4, "download-synchronously"]], "Download all assets": [[5, "download-all-assets"]], "Download batch job results": [[5, "download-batch-job-results"]], "Download single asset": [[5, "download-single-asset"]], "Downloading a datacube and executing an UDF locally": [[25, "downloading-a-datacube-and-executing-an-udf-locally"]], "EODC back-end": [[9, "eodc-back-end"], [9, "id5"]], "Enabling additional features": [[21, "enabling-additional-features"]], "Evaluate user-defined processes": [[26, "evaluate-user-defined-processes"]], "Example": [[25, "example"]], "Example use case: EVI map and timeseries": [[4, "example-use-case-evi-map-and-timeseries"]], "Example: Smoothing timeseries with a user defined function (UDF)": [[25, "example-smoothing-timeseries-with-a-user-defined-function-udf"]], "Example: apply_dimension with a UDF": [[25, "example-apply-dimension-with-a-udf"]], "Example: apply_neighborhood with a UDF": [[25, "example-apply-neighborhood-with-a-udf"]], "Example: reduce_dimension with a UDF": [[25, "example-reduce-dimension-with-a-udf"]], "Examples": [[25, "examples"]], "Execute a process graph directly from raw JSON": [[15, "execute-a-process-graph-directly-from-raw-json"]], "Export a process graph": [[15, "export-a-process-graph"]], "Filter on collection properties": [[17, "filter-on-collection-properties"]], "Filter on spatial extent": [[17, "filter-on-spatial-extent"]], "Filter on temporal extent": [[17, "filter-on-temporal-extent"]], "Finding and loading data": [[17, null]], "Fine-grained asset downloads": [[5, "fine-grained-asset-downloads"]], "Fixed": [[7, "fixed"], [7, "id4"], [7, "id7"], [7, "id10"], [7, "id13"], [7, "id17"], [7, "id22"], [7, "id29"], [7, "id33"], [7, "id36"], [7, "id39"], [7, "id47"], [7, "id51"], [7, "id53"], [7, "id57"], [7, "id63"], [7, "id66"], [7, "id68"], [7, "id72"], [7, "id77"], [7, "id81"], [7, "id87"], [7, "id95"], [7, "id101"], [7, "id107"], [7, "id123"], [7, "id132"], [7, "id139"], [7, "id147"], [7, "id155"], [7, "id158"]], "Format": [[8, "format"]], "From a parameterized data cube": [[26, "from-a-parameterized-data-cube"]], "Functions in openeo.processes": [[2, "functions-in-openeo-processes"]], "General code style recommendations": [[6, "general-code-style-recommendations"]], "General options": [[3, "general-options"]], "Generic API for adding processes": [[24, "generic-api-for-adding-processes"]], "Geotrellis back-end": [[9, "geotrellis-back-end"], [9, "id6"]], "Getting Started": [[4, null]], "Graph building": [[0, "graph-building"]], "Guidelines and tips": [[3, "guidelines-and-tips"]], "Handling large vector data sets": [[17, "handling-large-vector-data-sets"]], "High level Interface": [[0, "high-level-interface"]], "Important files": [[19, "important-files"]], "Indices and tables": [[20, "indices-and-tables"]], "Inference": [[22, "inference"]], "Initial exploration of an openEO collection": [[17, "initial-exploration-of-an-openeo-collection"]], "Installation": [[12, "installation"], [21, null]], "Installation with Conda": [[21, "installation-with-conda"]], "Installation with pip": [[21, "installation-with-pip"]], "Internal openEO process graph building utilities": [[0, "internal-openeo-process-graph-building-utilities"]], "Job creation based on parameterized processes": [[11, "job-creation-based-on-parameterized-processes"]], "Jupyter integration": [[5, "jupyter-integration"]], "Jupyter(lab) tips and tricks": [[6, "jupyter-lab-tips-and-tricks"]], "Left-closed intervals: start included, end excluded": [[17, "left-closed-intervals-start-included-end-excluded"]], "Legacy read_vector usage": [[15, "legacy-read-vector-usage"]], "Like a Pro": [[19, "like-a-pro"]], "Line (length) management": [[6, "line-length-management"]], "List your batch jobs": [[5, "list-your-batch-jobs"]], "Loading a data cube from a collection": [[17, "loading-a-data-cube-from-a-collection"]], "Loading a published user-defined process as DataCube": [[16, "loading-a-published-user-defined-process-as-datacube"]], "Loading an initial data cube": [[4, "loading-an-initial-data-cube"]], "Local Collections": [[12, "local-collections"]], "Local Processing": [[12, "local-processing"]], "Location": [[8, "location"]], "Logging from a UDF": [[25, "logging-from-a-udf"]], "Machine Learning": [[22, null]], "Manual band mapping": [[14, "manual-band-mapping"]], "Miscellaneous tips and tricks": [[15, null]], "Module openeo.udf.udf_signatures": [[25, "module-openeo.udf.udf_signatures"]], "More advanced parameter schemas": [[26, "more-advanced-parameter-schemas"]], "Multi Backend Job Manager": [[11, null]], "OIDC Authentication: Client Credentials Flow": [[3, "oidc-authentication-client-credentials-flow"]], "OIDC Authentication: Device Code Flow": [[3, "oidc-authentication-device-code-flow"]], "OIDC Authentication: Dynamic Method Selection": [[3, "oidc-authentication-dynamic-method-selection"]], "OIDC Authentication: Refresh Token Flow": [[3, "oidc-authentication-refresh-token-flow"]], "OIDC Client Credentials Using Environment Variables": [[3, "oidc-client-credentials-using-environment-variables"]], "OpenID Connect Based Authentication": [[3, "openid-connect-based-authentication"]], "OpenID Connect configs": [[3, "openid-connect-configs"]], "OpenID Connect refresh tokens": [[3, "openid-connect-refresh-tokens"]], "Optional dependencies": [[21, "optional-dependencies"]], "Parameterization": [[18, "parameterization"]], "Passing data cube arguments": [[24, "passing-data-cube-arguments"]], "Passing results from other process calls as arguments": [[24, "passing-results-from-other-process-calls-as-arguments"]], "Performance & scalability": [[13, "performance-scalability"]], "Pre-commit for basic code quality checks": [[19, "pre-commit-for-basic-code-quality-checks"]], "Pre-commit set up": [[19, "pre-commit-set-up"]], "Pre-commit usage": [[19, "pre-commit-usage"]], "Prerequisites": [[19, "prerequisites"]], "Procedure": [[19, "procedure"]], "Process Parameters": [[26, "process-parameters"]], "ProcessBasedJobCreator with geometry handling": [[11, "processbasedjobcreator-with-geometry-handling"]], "ProcessBuilder helper class": [[2, "processbuilder-helper-class"]], "Processes with child \u201ccallbacks\u201d": [[24, "processes-with-child-callbacks"]], "Profile a process server-side": [[25, "profile-a-process-server-side"]], "Public openEO process graph building utilities": [[0, "public-openeo-process-graph-building-utilities"]], "Publicly publishing a user-defined process.": [[16, "publicly-publishing-a-user-defined-process"]], "Pull requests": [[19, "pull-requests"]], "Quick and easy": [[19, "quick-and-easy"]], "Random Forest based Classification and Regression": [[22, "random-forest-based-classification-and-regression"]], "Re-parameterization": [[18, "re-parameterization"]], "Reconnecting to a batch job": [[5, "reconnecting-to-a-batch-job"]], "Reference implementations": [[9, "reference-implementations"], [9, "id4"]], "Removed": [[7, "removed"], [7, "id9"], [7, "id35"], [7, "id46"], [7, "id71"], [7, "id86"], [7, "id91"], [7, "id97"], [7, "id100"], [7, "id111"], [7, "id116"], [7, "id128"], [7, "id136"], [7, "id146"], [7, "id152"]], "Rounding down periods to dates": [[17, "rounding-down-periods-to-dates"]], "Run a batch job": [[5, "run-a-batch-job"]], "Running the unit tests": [[19, "running-the-unit-tests"]], "SAR backscatter": [[9, "sar-backscatter"]], "STAC Collections and Items": [[12, "stac-collections-and-items"]], "Sampling at scale": [[13, "sampling-at-scale"]], "Sections:": [[2, "sections"]], "Sharing of user-defined processes": [[16, null]], "Single string temporal extents": [[17, "single-string-temporal-extents"]], "Some examples": [[18, "some-examples"]], "Source or development install": [[21, "source-or-development-install"]], "Spectral Indices": [[14, null]], "Standard for declaring Python UDF dependencies": [[25, "standard-for-declaring-python-udf-dependencies"]], "Store to a file": [[26, "store-to-a-file"]], "Table of contents": [[20, "table-of-contents"]], "Testing": [[0, "testing"]], "The load_collection process": [[18, "the-load-collection-process"]], "Through \u201cprocess functions\u201d": [[26, "through-process-functions"]], "Training": [[22, "training"]], "UDF dependency management": [[25, "udf-dependency-management"]], "UDF function names and signatures": [[25, "udf-function-names-and-signatures"]], "UDF script": [[25, "udf-script"]], "UDFs as apply/reduce \u201ccallbacks\u201d": [[25, "udfs-as-apply-reduce-callbacks"]], "UDF\u2019s that transform cube metadata": [[25, "udf-s-that-transform-cube-metadata"]], "UDP Example: EVI timeseries": [[26, "udp-example-evi-timeseries"]], "Update of generated files": [[19, "update-of-generated-files"]], "Usage": [[12, "usage"], [25, "usage"]], "Usage example": [[20, "usage-example"]], "User-Defined Functions (UDF) explained": [[25, null]], "User-Defined Processes (UDP)": [[26, null]], "Using a predefined dictionary": [[26, "using-a-predefined-dictionary"]], "Using a public UDP through URL based \u201cnamespace\u201d": [[16, "using-a-public-udp-through-url-based-namespace"]], "Using common pre-defined processes": [[24, "using-common-pre-defined-processes"]], "Verification": [[19, "verification"], [25, "verification"]], "Verifying and troubleshooting": [[21, "verifying-and-troubleshooting"]], "Viewing profiling information": [[25, "viewing-profiling-information"]], "Wait for a batch job to finish": [[5, "wait-for-a-batch-job-to-finish"]], "Workflow script": [[25, "workflow-script"]], "Working with processes": [[24, null]], "Year/month shorthand notation": [[17, "year-month-shorthand-notation"]], "[0.10.0] - 2022-04-08 - \u201cSRR3\u201d release": [[7, "srr3-release"]], "[0.10.1] - 2022-05-18 - \u201cLPS22\u201d release": [[7, "lps22-release"]], "[0.11.0] - 2022-07-02": [[7, "id92"]], "[0.12.0] - 2022-09-09": [[7, "id88"]], "[0.12.1] - 2022-09-15": [[7, "id84"]], "[0.13.0] - 2022-10-10 - \u201cUDF UX\u201d release": [[7, "udf-ux-release"]], "[0.14.0] - 2023-02-01": [[7, "id78"]], "[0.14.1] - 2023-02-06": [[7, "id76"]], "[0.15.0] - 2023-03-03": [[7, "id73"]], "[0.16.0] - 2023-04-17 - \u201cSRR5\u201d release": [[7, "srr5-release"]], "[0.17.0] and [0.17.1] - 2023-05-16": [[7, "and-0-17-1-2023-05-16"]], "[0.18.0] - 2023-05-31": [[7, "id64"]], "[0.19.0] - 2023-06-16": [[7, "id61"]], "[0.20.0] - 2023-06-30": [[7, "id58"]], "[0.21.0] - 2023-07-19": [[7, "id54"]], "[0.21.1] - 2023-07-19": [[7, "id52"]], "[0.22.0] - 2023-08-09": [[7, "id48"]], "[0.23.0] - 2023-10-02": [[7, "id43"]], "[0.24.0] - 2023-10-27": [[7, "id40"]], "[0.25.0] - 2023-11-02": [[7, "id37"]], "[0.26.0] - 2023-11-27 - \u201cSRR6\u201d release": [[7, "srr6-release"]], "[0.27.0] - 2024-01-12": [[7, "id30"]], "[0.28.0] - 2024-03-18": [[7, "id26"]], "[0.29.0] - 2024-05-03": [[7, "id23"]], "[0.30.0] - 2024-06-18": [[7, "id20"]], "[0.31.0] - 2024-07-26": [[7, "id18"]], "[0.32.0] - 2024-09-27": [[7, "id14"]], "[0.33.0] - 2024-10-18": [[7, "id11"]], "[0.34.0] - 2024-10-31": [[7, "id8"]], "[0.35.0] - 2024-11-19": [[7, "id5"]], "[0.36.0] - 2024-12-10": [[7, "id1"]], "[0.4.10] - 2021-02-26": [[7, "id133"]], "[0.4.4] - 2020-08-20": [[7, "id156"]], "[0.4.5] - 2020-10-01": [[7, "id153"]], "[0.4.6] - 2020-10-15": [[7, "id148"]], "[0.4.7] - 2020-10-22": [[7, "id143"]], "[0.4.8] - 2020-11-17": [[7, "id140"]], "[0.4.9] - 2021-01-29": [[7, "id137"]], "[0.5.0] - 2021-03-17": [[7, "id129"]], "[0.6.0] - 2021-03-26": [[7, "id126"]], "[0.6.1] - 2021-03-29": [[7, "id124"]], "[0.7.0] - 2021-04-21": [[7, "id120"]], "[0.8.0] - 2021-06-25": [[7, "id117"]], "[0.8.1] - 2021-08-24": [[7, "id113"]], "[0.8.2] - 2021-08-24": [[7, "id112"]], "[0.9.0] - 2021-10-11": [[7, "id108"]], "[0.9.1] - 2021-11-16": [[7, "id104"]], "[0.9.2] - 2022-01-14": [[7, "id102"]], "[Unreleased]": [[7, "unreleased"]], "openEO CookBook": [[10, null]], "openEO Process Mapping": [[23, null]], "openEO Python Client": [[20, null]], "openeo": [[0, "openeo"]], "openeo.UDF API and usage changes in version 0.13.0": [[25, "openeo-udf-api-and-usage-changes-in-version-0-13-0"]], "openeo.api.logs": [[0, "module-openeo.api.logs"]], "openeo.api.process": [[0, "module-openeo.api.process"]], "openeo.metadata": [[0, "module-openeo.metadata"]], "openeo.processes": [[0, "openeo-processes"]], "openeo.rest.connection": [[0, "module-openeo.rest.connection"]], "openeo.rest.conversions": [[0, "module-openeo.rest.conversions"]], "openeo.rest.datacube": [[0, "module-openeo.rest.datacube"]], "openeo.rest.job": [[0, "module-openeo.rest.job"]], "openeo.rest.mlmodel": [[0, "module-openeo.rest.mlmodel"]], "openeo.rest.multiresult": [[0, "module-openeo.rest.multiresult"]], "openeo.rest.udp": [[0, "module-openeo.rest.udp"]], "openeo.rest.userfile": [[0, "module-openeo.rest.userfile"]], "openeo.rest.vectorcube": [[0, "module-openeo.rest.vectorcube"]], "openeo.testing": [[0, "module-openeo.testing"]], "openeo.testing.results": [[0, "module-openeo.testing.results"]], "openeo.udf": [[0, "module-openeo.udf.udf_data"]], "openeo.util": [[0, "module-openeo.util"]], "to do": [[11, null]]}, "docnames": ["api", "api-processbuilder", "api-processes", "auth", "basics", "batch_jobs", "best_practices", "changelog", "configuration", "cookbook/ard", "cookbook/index", "cookbook/job_manager", "cookbook/localprocessing", "cookbook/sampling", "cookbook/spectral_indices", "cookbook/tricks", "cookbook/udp_sharing", "data_access", "datacube_construction", "development", "index", "installation", "machine_learning", "process_mapping", "processes", "udf", "udp"], "envversion": {"sphinx": 64, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.viewcode": 1}, "filenames": ["api.rst", "api-processbuilder.rst", "api-processes.rst", "auth.rst", "basics.rst", "batch_jobs.rst", "best_practices.rst", "changelog.md", "configuration.rst", "cookbook/ard.rst", "cookbook/index.rst", "cookbook/job_manager.rst", "cookbook/localprocessing.rst", "cookbook/sampling.md", "cookbook/spectral_indices.rst", "cookbook/tricks.rst", "cookbook/udp_sharing.rst", "data_access.rst", "datacube_construction.rst", "development.rst", "index.rst", "installation.rst", "machine_learning.rst", "process_mapping.rst", "processes.rst", "udf.rst", "udp.rst"], "indexentries": {"__call__() (openeo.extra.job_management.processbasedjobcreator method)": [[11, "openeo.extra.job_management.ProcessBasedJobCreator.__call__", false]], "__init__() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.__init__", false]], "__init__() (openeo.rest.multiresult.multiresult method)": [[0, "openeo.rest.multiresult.MultiResult.__init__", false]], "absolute() (in module openeo.processes)": [[2, "openeo.processes.absolute", false]], "add() (in module openeo.processes)": [[2, "openeo.processes.add", false]], "add() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.add", false]], "add_backend() (openeo.extra.job_management.multibackendjobmanager method)": [[11, "openeo.extra.job_management.MultiBackendJobManager.add_backend", false]], "add_dimension() (in module openeo.processes)": [[2, "openeo.processes.add_dimension", false]], "add_dimension() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.add_dimension", false]], "aggregate_spatial() (in module openeo.processes)": [[2, "openeo.processes.aggregate_spatial", false]], "aggregate_spatial() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.aggregate_spatial", false]], "aggregate_spatial_window() (in module openeo.processes)": [[2, "openeo.processes.aggregate_spatial_window", false]], "aggregate_spatial_window() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.aggregate_spatial_window", false]], "aggregate_temporal() (in module openeo.processes)": [[2, "openeo.processes.aggregate_temporal", false]], "aggregate_temporal() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.aggregate_temporal", false]], "aggregate_temporal_period() (in module openeo.processes)": [[2, "openeo.processes.aggregate_temporal_period", false]], "aggregate_temporal_period() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.aggregate_temporal_period", false]], "all() (in module openeo.processes)": [[2, "openeo.processes.all", false]], "and_() (in module openeo.processes)": [[2, "openeo.processes.and_", false]], "anomaly() (in module openeo.processes)": [[2, "openeo.processes.anomaly", false]], "any() (in module openeo.processes)": [[2, "openeo.processes.any", false]], "append_and_rescale_indices() (in module openeo.extra.spectral_indices)": [[14, "openeo.extra.spectral_indices.append_and_rescale_indices", false]], "append_band() (openeo.metadata.banddimension method)": [[0, "openeo.metadata.BandDimension.append_band", false]], "append_index() (in module openeo.extra.spectral_indices)": [[14, "openeo.extra.spectral_indices.append_index", false]], "append_indices() (in module openeo.extra.spectral_indices)": [[14, "openeo.extra.spectral_indices.append_indices", false]], "apply() (in module openeo.processes)": [[2, "openeo.processes.apply", false]], "apply() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.apply", false]], "apply_datacube() (in module openeo.udf.udf_signatures)": [[25, "openeo.udf.udf_signatures.apply_datacube", false]], "apply_dimension() (in module openeo.processes)": [[2, "openeo.processes.apply_dimension", false]], "apply_dimension() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.apply_dimension", false]], "apply_dimension() (openeo.rest.vectorcube.vectorcube method)": [[0, "openeo.rest.vectorcube.VectorCube.apply_dimension", false]], "apply_kernel() (in module openeo.processes)": [[2, "openeo.processes.apply_kernel", false]], "apply_kernel() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.apply_kernel", false]], "apply_metadata() (in module openeo.udf.udf_signatures)": [[25, "openeo.udf.udf_signatures.apply_metadata", false]], "apply_neighborhood() (in module openeo.processes)": [[2, "openeo.processes.apply_neighborhood", false]], "apply_neighborhood() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.apply_neighborhood", false]], "apply_polygon() (in module openeo.processes)": [[2, "openeo.processes.apply_polygon", false]], "apply_polygon() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.apply_polygon", false]], "apply_timeseries() (in module openeo.udf.udf_signatures)": [[25, "openeo.udf.udf_signatures.apply_timeseries", false]], "apply_udf_data() (in module openeo.udf.udf_signatures)": [[25, "openeo.udf.udf_signatures.apply_udf_data", false]], "apply_vectorcube() (in module openeo.udf.udf_signatures)": [[25, "openeo.udf.udf_signatures.apply_vectorcube", false]], "arccos() (in module openeo.processes)": [[2, "openeo.processes.arccos", false]], "arcosh() (in module openeo.processes)": [[2, "openeo.processes.arcosh", false]], "arcsin() (in module openeo.processes)": [[2, "openeo.processes.arcsin", false]], "arctan() (in module openeo.processes)": [[2, "openeo.processes.arctan", false]], "arctan2() (in module openeo.processes)": [[2, "openeo.processes.arctan2", false]], "ard_normalized_radar_backscatter() (in module openeo.processes)": [[2, "openeo.processes.ard_normalized_radar_backscatter", false]], "ard_normalized_radar_backscatter() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.ard_normalized_radar_backscatter", false]], "ard_surface_reflectance() (in module openeo.processes)": [[2, "openeo.processes.ard_surface_reflectance", false]], "ard_surface_reflectance() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.ard_surface_reflectance", false]], "array (openeo.udf.xarraydatacube.xarraydatacube property)": [[0, "openeo.udf.xarraydatacube.XarrayDataCube.array", false]], "array() (openeo.api.process.parameter class method)": [[0, "openeo.api.process.Parameter.array", false]], "array_append() (in module openeo.processes)": [[2, "openeo.processes.array_append", false]], "array_apply() (in module openeo.processes)": [[2, "openeo.processes.array_apply", false]], "array_concat() (in module openeo.processes)": [[2, "openeo.processes.array_concat", false]], "array_contains() (in module openeo.processes)": [[2, "openeo.processes.array_contains", false]], "array_create() (in module openeo.processes)": [[2, "openeo.processes.array_create", false]], "array_create_labeled() (in module openeo.processes)": [[2, "openeo.processes.array_create_labeled", false]], "array_element() (in module openeo.processes)": [[2, "openeo.processes.array_element", false]], "array_filter() (in module openeo.processes)": [[2, "openeo.processes.array_filter", false]], "array_find() (in module openeo.processes)": [[2, "openeo.processes.array_find", false]], "array_find_label() (in module openeo.processes)": [[2, "openeo.processes.array_find_label", false]], "array_interpolate_linear() (in module openeo.processes)": [[2, "openeo.processes.array_interpolate_linear", false]], "array_labels() (in module openeo.processes)": [[2, "openeo.processes.array_labels", false]], "array_modify() (in module openeo.processes)": [[2, "openeo.processes.array_modify", false]], "arsinh() (in module openeo.processes)": [[2, "openeo.processes.arsinh", false]], "artanh() (in module openeo.processes)": [[2, "openeo.processes.artanh", false]], "as_curl() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.as_curl", false]], "assert_job_results_allclose() (in module openeo.testing.results)": [[0, "openeo.testing.results.assert_job_results_allclose", false]], "assert_user_defined_process_support() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.assert_user_defined_process_support", false]], "assert_xarray_allclose() (in module openeo.testing.results)": [[0, "openeo.testing.results.assert_xarray_allclose", false]], "assert_xarray_dataarray_allclose() (in module openeo.testing.results)": [[0, "openeo.testing.results.assert_xarray_dataarray_allclose", false]], "assert_xarray_dataset_allclose() (in module openeo.testing.results)": [[0, "openeo.testing.results.assert_xarray_dataset_allclose", false]], "atmospheric_correction() (in module openeo.processes)": [[2, "openeo.processes.atmospheric_correction", false]], "atmospheric_correction() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.atmospheric_correction", false]], "authenticate_basic() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.authenticate_basic", false]], "authenticate_oidc() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.authenticate_oidc", false]], "authenticate_oidc_access_token() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.authenticate_oidc_access_token", false]], "authenticate_oidc_authorization_code() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.authenticate_oidc_authorization_code", false]], "authenticate_oidc_client_credentials() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.authenticate_oidc_client_credentials", false]], "authenticate_oidc_device() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.authenticate_oidc_device", false]], "authenticate_oidc_refresh_token() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.authenticate_oidc_refresh_token", false]], "authenticate_oidc_resource_owner_password_credentials() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.authenticate_oidc_resource_owner_password_credentials", false]], "band() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.band", false]], "band_filter() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.band_filter", false]], "band_index() (openeo.metadata.banddimension method)": [[0, "openeo.metadata.BandDimension.band_index", false]], "band_name() (openeo.metadata.banddimension method)": [[0, "openeo.metadata.BandDimension.band_name", false]], "banddimension (class in openeo.metadata)": [[0, "openeo.metadata.BandDimension", false]], "batch job": [[5, "index-0", false], [5, "index-1", false], [5, "index-2", false], [5, "index-3", false], [5, "index-4", false], [5, "index-5", false], [5, "index-6", false], [5, "index-7", false], [5, "index-8", false]], "batchjob (class in openeo.rest.job)": [[0, "openeo.rest.job.BatchJob", false]], "bboxdict (class in openeo.util)": [[0, "openeo.util.BBoxDict", false]], "between() (in module openeo.processes)": [[2, "openeo.processes.between", false]], "boolean() (openeo.api.process.parameter class method)": [[0, "openeo.api.process.Parameter.boolean", false]], "bounding_box() (openeo.api.process.parameter class method)": [[0, "openeo.api.process.Parameter.bounding_box", false]], "build_process_dict() (in module openeo.rest.udp)": [[0, "openeo.rest.udp.build_process_dict", false]], "capabilities() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.capabilities", false]], "ceil() (in module openeo.processes)": [[2, "openeo.processes.ceil", false]], "chunk_polygon() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.chunk_polygon", false]], "chunking": [[25, "index-2", false]], "climatological_normal() (in module openeo.processes)": [[2, "openeo.processes.climatological_normal", false]], "clip() (in module openeo.processes)": [[2, "openeo.processes.clip", false]], "cloud_detection() (in module openeo.processes)": [[2, "openeo.processes.cloud_detection", false]], "collection_items() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.collection_items", false]], "collection_property() (in module openeo.rest.graph_building)": [[0, "openeo.rest.graph_building.collection_property", false]], "collectionmetadata (class in openeo.metadata)": [[0, "openeo.metadata.CollectionMetadata", false]], "collectionproperty (class in openeo.rest.graph_building)": [[0, "openeo.rest.graph_building.CollectionProperty", false]], "compute_and_rescale_indices() (in module openeo.extra.spectral_indices)": [[14, "openeo.extra.spectral_indices.compute_and_rescale_indices", false]], "compute_index() (in module openeo.extra.spectral_indices)": [[14, "openeo.extra.spectral_indices.compute_index", false]], "compute_indices() (in module openeo.extra.spectral_indices)": [[14, "openeo.extra.spectral_indices.compute_indices", false]], "connect() (in module openeo)": [[0, "openeo.connect", false]], "connection (class in openeo.rest.connection)": [[0, "openeo.rest.connection.Connection", false]], "constant() (in module openeo.processes)": [[2, "openeo.processes.constant", false]], "cos() (in module openeo.processes)": [[2, "openeo.processes.cos", false]], "cosh() (in module openeo.processes)": [[2, "openeo.processes.cosh", false]], "count() (in module openeo.processes)": [[2, "openeo.processes.count", false]], "count_by_status() (openeo.extra.job_management.jobdatabaseinterface method)": [[11, "openeo.extra.job_management.JobDatabaseInterface.count_by_status", false]], "count_time() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.count_time", false]], "create": [[5, "index-1", false]], "create_collection() (openeo.rest.datacube.datacube class method)": [[0, "openeo.rest.datacube.DataCube.create_collection", false]], "create_data_cube() (in module openeo.processes)": [[2, "openeo.processes.create_data_cube", false]], "create_job() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.create_job", false]], "create_job() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.create_job", false]], "create_job() (openeo.rest.mlmodel.mlmodel method)": [[0, "openeo.rest.mlmodel.MlModel.create_job", false]], "create_job() (openeo.rest.vectorcube.vectorcube method)": [[0, "openeo.rest.vectorcube.VectorCube.create_job", false]], "csvjobdatabase (class in openeo.extra.job_management)": [[11, "openeo.extra.job_management.CsvJobDatabase", false]], "cummax() (in module openeo.processes)": [[2, "openeo.processes.cummax", false]], "cummin() (in module openeo.processes)": [[2, "openeo.processes.cummin", false]], "cumproduct() (in module openeo.processes)": [[2, "openeo.processes.cumproduct", false]], "cumsum() (in module openeo.processes)": [[2, "openeo.processes.cumsum", false]], "datacube (class in openeo.rest.datacube)": [[0, "openeo.rest.datacube.DataCube", false]], "datacube() (openeo.api.process.parameter class method)": [[0, "openeo.api.process.Parameter.datacube", false]], "datacube_from_file() (in module openeo.rest.conversions)": [[0, "openeo.rest.conversions.datacube_from_file", false]], "datacube_from_flat_graph() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.datacube_from_flat_graph", false]], "datacube_from_json() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.datacube_from_json", false]], "datacube_from_process() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.datacube_from_process", false]], "datacube_list (openeo.udf.udf_data.udfdata property)": [[0, "openeo.udf.udf_data.UdfData.datacube_list", false]], "datacube_plot() (in module openeo.rest.conversions)": [[0, "openeo.rest.conversions.datacube_plot", false]], "datacube_to_file() (in module openeo.rest.conversions)": [[0, "openeo.rest.conversions.datacube_to_file", false]], "date() (openeo.api.process.parameter class method)": [[0, "openeo.api.process.Parameter.date", false]], "date_between() (in module openeo.processes)": [[2, "openeo.processes.date_between", false]], "date_difference() (in module openeo.processes)": [[2, "openeo.processes.date_difference", false]], "date_shift() (in module openeo.processes)": [[2, "openeo.processes.date_shift", false]], "date_time() (openeo.api.process.parameter class method)": [[0, "openeo.api.process.Parameter.date_time", false]], "delete() (openeo.rest.job.batchjob method)": [[0, "openeo.rest.job.BatchJob.delete", false]], "delete() (openeo.rest.udp.restuserdefinedprocess method)": [[0, "openeo.rest.udp.RESTUserDefinedProcess.delete", false]], "delete() (openeo.rest.userfile.userfile method)": [[0, "openeo.rest.userfile.UserFile.delete", false]], "delete_job() (openeo.rest.job.batchjob method)": [[0, "openeo.rest.job.BatchJob.delete_job", false]], "describe() (openeo.rest.job.batchjob method)": [[0, "openeo.rest.job.BatchJob.describe", false]], "describe() (openeo.rest.udp.restuserdefinedprocess method)": [[0, "openeo.rest.udp.RESTUserDefinedProcess.describe", false]], "describe_account() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.describe_account", false]], "describe_collection() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.describe_collection", false]], "describe_job() (openeo.rest.job.batchjob method)": [[0, "openeo.rest.job.BatchJob.describe_job", false]], "describe_process() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.describe_process", false]], "dimension_labels() (in module openeo.processes)": [[2, "openeo.processes.dimension_labels", false]], "dimension_labels() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.dimension_labels", false]], "divide() (in module openeo.processes)": [[2, "openeo.processes.divide", false]], "divide() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.divide", false]], "download() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.download", false]], "download() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.download", false]], "download() (openeo.rest.job.resultasset method)": [[0, "openeo.rest.job.ResultAsset.download", false]], "download() (openeo.rest.userfile.userfile method)": [[0, "openeo.rest.userfile.UserFile.download", false]], "download() (openeo.rest.vectorcube.vectorcube method)": [[0, "openeo.rest.vectorcube.VectorCube.download", false]], "download_file() (openeo.rest.job.jobresults method)": [[0, "openeo.rest.job.JobResults.download_file", false]], "download_files() (openeo.rest.job.jobresults method)": [[0, "openeo.rest.job.JobResults.download_files", false]], "download_result() (openeo.rest.job.batchjob method)": [[0, "openeo.rest.job.BatchJob.download_result", false]], "download_results() (openeo.rest.job.batchjob method)": [[0, "openeo.rest.job.BatchJob.download_results", false]], "drop_dimension() (in module openeo.processes)": [[2, "openeo.processes.drop_dimension", false]], "drop_dimension() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.drop_dimension", false]], "e() (in module openeo.processes)": [[2, "openeo.processes.e", false]], "ensure_job_dir_exists() (openeo.extra.job_management.multibackendjobmanager method)": [[11, "openeo.extra.job_management.MultiBackendJobManager.ensure_job_dir_exists", false]], "eq() (in module openeo.processes)": [[2, "openeo.processes.eq", false]], "estimate() (openeo.rest.job.batchjob method)": [[0, "openeo.rest.job.BatchJob.estimate", false]], "estimate_job() (openeo.rest.job.batchjob method)": [[0, "openeo.rest.job.BatchJob.estimate_job", false]], "execute() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.execute", false]], "execute() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.execute", false]], "execute() (openeo.rest.vectorcube.vectorcube method)": [[0, "openeo.rest.vectorcube.VectorCube.execute", false]], "execute_batch() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.execute_batch", false]], "execute_batch() (openeo.rest.mlmodel.mlmodel method)": [[0, "openeo.rest.mlmodel.MlModel.execute_batch", false]], "execute_batch() (openeo.rest.vectorcube.vectorcube method)": [[0, "openeo.rest.vectorcube.VectorCube.execute_batch", false]], "execute_local_udf() (in module openeo.udf.run_code)": [[0, "openeo.udf.run_code.execute_local_udf", false]], "execute_local_udf() (openeo.rest.datacube.datacube static method)": [[0, "openeo.rest.datacube.DataCube.execute_local_udf", false]], "exists() (openeo.extra.job_management.jobdatabaseinterface method)": [[11, "openeo.extra.job_management.JobDatabaseInterface.exists", false]], "exp() (in module openeo.processes)": [[2, "openeo.processes.exp", false]], "extract_udf_dependencies() (in module openeo.udf.run_code)": [[0, "openeo.udf.run_code.extract_udf_dependencies", false]], "extrema() (in module openeo.processes)": [[2, "openeo.processes.extrema", false]], "feature_collection_list (openeo.udf.udf_data.udfdata property)": [[0, "openeo.udf.udf_data.UdfData.feature_collection_list", false]], "filter_bands() (in module openeo.processes)": [[2, "openeo.processes.filter_bands", false]], "filter_bands() (openeo.metadata.banddimension method)": [[0, "openeo.metadata.BandDimension.filter_bands", false]], "filter_bands() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.filter_bands", false]], "filter_bands() (openeo.rest.vectorcube.vectorcube method)": [[0, "openeo.rest.vectorcube.VectorCube.filter_bands", false]], "filter_bbox() (in module openeo.processes)": [[2, "openeo.processes.filter_bbox", false]], "filter_bbox() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.filter_bbox", false]], "filter_bbox() (openeo.rest.vectorcube.vectorcube method)": [[0, "openeo.rest.vectorcube.VectorCube.filter_bbox", false]], "filter_labels() (in module openeo.processes)": [[2, "openeo.processes.filter_labels", false]], "filter_labels() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.filter_labels", false]], "filter_labels() (openeo.rest.vectorcube.vectorcube method)": [[0, "openeo.rest.vectorcube.VectorCube.filter_labels", false]], "filter_spatial() (in module openeo.processes)": [[2, "openeo.processes.filter_spatial", false]], "filter_spatial() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.filter_spatial", false]], "filter_temporal() (in module openeo.processes)": [[2, "openeo.processes.filter_temporal", false]], "filter_temporal() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.filter_temporal", false]], "filter_vector() (in module openeo.processes)": [[2, "openeo.processes.filter_vector", false]], "filter_vector() (openeo.rest.vectorcube.vectorcube method)": [[0, "openeo.rest.vectorcube.VectorCube.filter_vector", false]], "first() (in module openeo.processes)": [[2, "openeo.processes.first", false]], "fit_class_random_forest() (openeo.rest.vectorcube.vectorcube method)": [[0, "openeo.rest.vectorcube.VectorCube.fit_class_random_forest", false]], "fit_curve() (in module openeo.processes)": [[2, "openeo.processes.fit_curve", false]], "fit_curve() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.fit_curve", false]], "fit_regr_random_forest() (openeo.rest.vectorcube.vectorcube method)": [[0, "openeo.rest.vectorcube.VectorCube.fit_regr_random_forest", false]], "flat_graph() (openeo.internal.graph_building.pgnode method)": [[0, "openeo.internal.graph_building.PGNode.flat_graph", false]], "flat_graph() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.flat_graph", false]], "flat_graph() (openeo.rest.mlmodel.mlmodel method)": [[0, "openeo.rest.mlmodel.MlModel.flat_graph", false]], "flat_graph() (openeo.rest.vectorcube.vectorcube method)": [[0, "openeo.rest.vectorcube.VectorCube.flat_graph", false]], "flatgraphablemixin (class in openeo.internal.graph_building)": [[0, "openeo.internal.graph_building.FlatGraphableMixin", false]], "flatten_dimensions() (in module openeo.processes)": [[2, "openeo.processes.flatten_dimensions", false]], "flatten_dimensions() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.flatten_dimensions", false]], "floor() (in module openeo.processes)": [[2, "openeo.processes.floor", false]], "from_dict() (openeo.udf.udf_data.udfdata class method)": [[0, "openeo.udf.udf_data.UdfData.from_dict", false]], "from_dict() (openeo.udf.xarraydatacube.xarraydatacube class method)": [[0, "openeo.udf.xarraydatacube.XarrayDataCube.from_dict", false]], "from_dict() (openeo.util.bboxdict class method)": [[0, "openeo.util.BBoxDict.from_dict", false]], "from_file() (openeo.rest._datacube.udf class method)": [[0, "openeo.rest._datacube.UDF.from_file", false]], "from_file() (openeo.udf.xarraydatacube.xarraydatacube class method)": [[0, "openeo.udf.xarraydatacube.XarrayDataCube.from_file", false]], "from_flat_graph() (openeo.internal.graph_building.pgnode static method)": [[0, "openeo.internal.graph_building.PGNode.from_flat_graph", false]], "from_metadata() (openeo.rest.userfile.userfile class method)": [[0, "openeo.rest.userfile.UserFile.from_metadata", false]], "from_sequence() (openeo.util.bboxdict class method)": [[0, "openeo.util.BBoxDict.from_sequence", false]], "from_url() (openeo.rest._datacube.udf class method)": [[0, "openeo.rest._datacube.UDF.from_url", false]], "geojson() (openeo.api.process.parameter class method)": [[0, "openeo.api.process.Parameter.geojson", false]], "get_array() (openeo.udf.xarraydatacube.xarraydatacube method)": [[0, "openeo.udf.xarraydatacube.XarrayDataCube.get_array", false]], "get_asset() (openeo.rest.job.jobresults method)": [[0, "openeo.rest.job.JobResults.get_asset", false]], "get_assets() (openeo.rest.job.jobresults method)": [[0, "openeo.rest.job.JobResults.get_assets", false]], "get_by_status() (openeo.extra.job_management.jobdatabaseinterface method)": [[11, "openeo.extra.job_management.JobDatabaseInterface.get_by_status", false]], "get_datacube_list() (openeo.udf.udf_data.udfdata method)": [[0, "openeo.udf.udf_data.UdfData.get_datacube_list", false]], "get_error_log_path() (openeo.extra.job_management.multibackendjobmanager method)": [[11, "openeo.extra.job_management.MultiBackendJobManager.get_error_log_path", false]], "get_feature_collection_list() (openeo.udf.udf_data.udfdata method)": [[0, "openeo.udf.udf_data.UdfData.get_feature_collection_list", false]], "get_file() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.get_file", false]], "get_job_dir() (openeo.extra.job_management.multibackendjobmanager method)": [[11, "openeo.extra.job_management.MultiBackendJobManager.get_job_dir", false]], "get_job_metadata_path() (openeo.extra.job_management.multibackendjobmanager method)": [[11, "openeo.extra.job_management.MultiBackendJobManager.get_job_metadata_path", false]], "get_metadata() (openeo.rest.job.jobresults method)": [[0, "openeo.rest.job.JobResults.get_metadata", false]], "get_path() (openeo.testing.testdataloader method)": [[0, "openeo.testing.TestDataLoader.get_path", false]], "get_result() (openeo.rest.job.batchjob method)": [[0, "openeo.rest.job.BatchJob.get_result", false]], "get_results() (openeo.rest.job.batchjob method)": [[0, "openeo.rest.job.BatchJob.get_results", false]], "get_results_metadata_url() (openeo.rest.job.batchjob method)": [[0, "openeo.rest.job.BatchJob.get_results_metadata_url", false]], "get_run_udf_callback() (openeo.rest._datacube.udf method)": [[0, "openeo.rest._datacube.UDF.get_run_udf_callback", false]], "get_structured_data_list() (openeo.udf.udf_data.udfdata method)": [[0, "openeo.udf.udf_data.UdfData.get_structured_data_list", false]], "graph_add_node() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.graph_add_node", false]], "gt() (in module openeo.processes)": [[2, "openeo.processes.gt", false]], "gte() (in module openeo.processes)": [[2, "openeo.processes.gte", false]], "href (openeo.rest.job.resultasset attribute)": [[0, "openeo.rest.job.ResultAsset.href", false]], "if_() (in module openeo.processes)": [[2, "openeo.processes.if_", false]], "imagecollection() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.imagecollection", false]], "inspect() (in module openeo.processes)": [[2, "openeo.processes.inspect", false]], "inspect() (in module openeo.udf.debug)": [[0, "openeo.udf.debug.inspect", false]], "int() (in module openeo.processes)": [[2, "openeo.processes.int", false]], "integer() (openeo.api.process.parameter class method)": [[0, "openeo.api.process.Parameter.integer", false]], "invalidtimeseriesexception": [[0, "openeo.rest.conversions.InvalidTimeSeriesException", false]], "is_infinite() (in module openeo.processes)": [[2, "openeo.processes.is_infinite", false]], "is_nan() (in module openeo.processes)": [[2, "openeo.processes.is_nan", false]], "is_nodata() (in module openeo.processes)": [[2, "openeo.processes.is_nodata", false]], "is_valid() (in module openeo.processes)": [[2, "openeo.processes.is_valid", false]], "job": [[5, "index-0", false]], "job() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.job", false]], "job_id (openeo.rest.job.batchjob attribute)": [[0, "openeo.rest.job.BatchJob.job_id", false]], "job_logs() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.job_logs", false]], "job_results() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.job_results", false]], "jobdatabaseinterface (class in openeo.extra.job_management)": [[11, "openeo.extra.job_management.JobDatabaseInterface", false]], "jobresults (class in openeo.rest.job)": [[0, "openeo.rest.job.JobResults", false]], "last() (in module openeo.processes)": [[2, "openeo.processes.last", false]], "linear_scale_range() (in module openeo.processes)": [[2, "openeo.processes.linear_scale_range", false]], "linear_scale_range() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.linear_scale_range", false]], "list_collection_ids() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.list_collection_ids", false]], "list_collections() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.list_collections", false]], "list_file_formats() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.list_file_formats", false]], "list_file_types() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.list_file_types", false]], "list_files() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.list_files", false]], "list_indices() (in module openeo.extra.spectral_indices)": [[14, "openeo.extra.spectral_indices.list_indices", false]], "list_jobs() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.list_jobs", false]], "list_processes() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.list_processes", false]], "list_results() (openeo.rest.job.batchjob method)": [[0, "openeo.rest.job.BatchJob.list_results", false]], "list_service_types() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.list_service_types", false]], "list_services() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.list_services", false]], "list_udf_runtimes() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.list_udf_runtimes", false]], "list_user_defined_processes() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.list_user_defined_processes", false]], "listing": [[5, "index-3", false]], "ln() (in module openeo.processes)": [[2, "openeo.processes.ln", false]], "ln() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.ln", false]], "load_bytes() (openeo.rest.job.resultasset method)": [[0, "openeo.rest.job.ResultAsset.load_bytes", false]], "load_collection() (in module openeo.processes)": [[2, "openeo.processes.load_collection", false]], "load_collection() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.load_collection", false]], "load_collection() (openeo.rest.datacube.datacube class method)": [[0, "openeo.rest.datacube.DataCube.load_collection", false]], "load_disk_collection() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.load_disk_collection", false]], "load_disk_collection() (openeo.rest.datacube.datacube class method)": [[0, "openeo.rest.datacube.DataCube.load_disk_collection", false]], "load_geojson() (in module openeo.processes)": [[2, "openeo.processes.load_geojson", false]], "load_geojson() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.load_geojson", false]], "load_geojson() (openeo.rest.vectorcube.vectorcube class method)": [[0, "openeo.rest.vectorcube.VectorCube.load_geojson", false]], "load_json() (openeo.rest.job.resultasset method)": [[0, "openeo.rest.job.ResultAsset.load_json", false]], "load_json() (openeo.testing.testdataloader method)": [[0, "openeo.testing.TestDataLoader.load_json", false]], "load_json_resource() (in module openeo.util)": [[0, "openeo.util.load_json_resource", false]], "load_ml_model() (in module openeo.processes)": [[2, "openeo.processes.load_ml_model", false]], "load_ml_model() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.load_ml_model", false]], "load_ml_model() (openeo.rest.mlmodel.mlmodel static method)": [[0, "openeo.rest.mlmodel.MlModel.load_ml_model", false]], "load_result() (in module openeo.processes)": [[2, "openeo.processes.load_result", false]], "load_result() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.load_result", false]], "load_stac() (in module openeo.processes)": [[2, "openeo.processes.load_stac", false]], "load_stac() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.load_stac", false]], "load_stac() (openeo.rest.datacube.datacube class method)": [[0, "openeo.rest.datacube.DataCube.load_stac", false]], "load_stac_from_job() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.load_stac_from_job", false]], "load_uploaded_files() (in module openeo.processes)": [[2, "openeo.processes.load_uploaded_files", false]], "load_url() (in module openeo.processes)": [[2, "openeo.processes.load_url", false]], "load_url() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.load_url", false]], "load_url() (openeo.rest.vectorcube.vectorcube class method)": [[0, "openeo.rest.vectorcube.VectorCube.load_url", false]], "log() (in module openeo.processes)": [[2, "openeo.processes.log", false]], "log10() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.log10", false]], "log2() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.log2", false]], "logarithm() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.logarithm", false]], "logentry (class in openeo.api.logs)": [[0, "openeo.api.logs.LogEntry", false]], "logical_and() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.logical_and", false]], "logical_or() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.logical_or", false]], "logs": [[5, "index-7", false]], "logs() (openeo.rest.job.batchjob method)": [[0, "openeo.rest.job.BatchJob.logs", false]], "lt() (in module openeo.processes)": [[2, "openeo.processes.lt", false]], "lte() (in module openeo.processes)": [[2, "openeo.processes.lte", false]], "mask() (in module openeo.processes)": [[2, "openeo.processes.mask", false]], "mask() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.mask", false]], "mask_polygon() (in module openeo.processes)": [[2, "openeo.processes.mask_polygon", false]], "mask_polygon() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.mask_polygon", false]], "max() (in module openeo.processes)": [[2, "openeo.processes.max", false]], "max_time() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.max_time", false]], "mean() (in module openeo.processes)": [[2, "openeo.processes.mean", false]], "mean_time() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.mean_time", false]], "median() (in module openeo.processes)": [[2, "openeo.processes.median", false]], "median_time() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.median_time", false]], "merge() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.merge", false]], "merge_cubes() (in module openeo.processes)": [[2, "openeo.processes.merge_cubes", false]], "merge_cubes() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.merge_cubes", false]], "metadata (openeo.rest.job.resultasset attribute)": [[0, "openeo.rest.job.ResultAsset.metadata", false]], "min() (in module openeo.processes)": [[2, "openeo.processes.min", false]], "min_time() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.min_time", false]], "mlmodel (class in openeo.rest.mlmodel)": [[0, "openeo.rest.mlmodel.MlModel", false]], "mod() (in module openeo.processes)": [[2, "openeo.processes.mod", false]], "module": [[0, "module-openeo.api.logs", false], [0, "module-openeo.api.process", false], [0, "module-openeo.internal.graph_building", false], [0, "module-openeo.metadata", false], [0, "module-openeo.rest._datacube", false], [0, "module-openeo.rest.connection", false], [0, "module-openeo.rest.conversions", false], [0, "module-openeo.rest.datacube", false], [0, "module-openeo.rest.graph_building", false], [0, "module-openeo.rest.job", false], [0, "module-openeo.rest.mlmodel", false], [0, "module-openeo.rest.multiresult", false], [0, "module-openeo.rest.udp", false], [0, "module-openeo.rest.userfile", false], [0, "module-openeo.rest.vectorcube", false], [0, "module-openeo.testing", false], [0, "module-openeo.testing.results", false], [0, "module-openeo.udf.debug", false], [0, "module-openeo.udf.run_code", false], [0, "module-openeo.udf.structured_data", false], [0, "module-openeo.udf.udf_data", false], [0, "module-openeo.udf.xarraydatacube", false], [0, "module-openeo.util", false], [2, "module-openeo.processes", false], [14, "module-openeo.extra.spectral_indices", false], [25, "module-openeo.udf.udf_signatures", false]], "multibackendjobmanager (class in openeo.extra.job_management)": [[11, "openeo.extra.job_management.MultiBackendJobManager", false]], "multiply() (in module openeo.processes)": [[2, "openeo.processes.multiply", false]], "multiply() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.multiply", false]], "multiresult (class in openeo.rest.multiresult)": [[0, "openeo.rest.multiresult.MultiResult", false]], "name (openeo.rest.job.resultasset attribute)": [[0, "openeo.rest.job.ResultAsset.name", false]], "nan() (in module openeo.processes)": [[2, "openeo.processes.nan", false]], "ndvi() (in module openeo.processes)": [[2, "openeo.processes.ndvi", false]], "ndvi() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.ndvi", false]], "neq() (in module openeo.processes)": [[2, "openeo.processes.neq", false]], "normalize_crs() (in module openeo.util)": [[0, "openeo.util.normalize_crs", false]], "normalize_log_level() (in module openeo.api.logs)": [[0, "openeo.api.logs.normalize_log_level", false]], "normalized_difference() (in module openeo.processes)": [[2, "openeo.processes.normalized_difference", false]], "normalized_difference() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.normalized_difference", false]], "not_() (in module openeo.processes)": [[2, "openeo.processes.not_", false]], "number() (openeo.api.process.parameter class method)": [[0, "openeo.api.process.Parameter.number", false]], "object": [[5, "index-2", false]], "object() (openeo.api.process.parameter class method)": [[0, "openeo.api.process.Parameter.object", false]], "on_job_cancel() (openeo.extra.job_management.multibackendjobmanager method)": [[11, "openeo.extra.job_management.MultiBackendJobManager.on_job_cancel", false]], "on_job_done() (openeo.extra.job_management.multibackendjobmanager method)": [[11, "openeo.extra.job_management.MultiBackendJobManager.on_job_done", false]], "on_job_error() (openeo.extra.job_management.multibackendjobmanager method)": [[11, "openeo.extra.job_management.MultiBackendJobManager.on_job_error", false]], "openeo.api.logs": [[0, "module-openeo.api.logs", false]], "openeo.api.process": [[0, "module-openeo.api.process", false]], "openeo.extra.spectral_indices": [[14, "module-openeo.extra.spectral_indices", false]], "openeo.internal.graph_building": [[0, "module-openeo.internal.graph_building", false]], "openeo.metadata": [[0, "module-openeo.metadata", false]], "openeo.processes": [[2, "module-openeo.processes", false]], "openeo.rest._datacube": [[0, "module-openeo.rest._datacube", false]], "openeo.rest.connection": [[0, "module-openeo.rest.connection", false]], "openeo.rest.conversions": [[0, "module-openeo.rest.conversions", false]], "openeo.rest.datacube": [[0, "module-openeo.rest.datacube", false]], "openeo.rest.graph_building": [[0, "module-openeo.rest.graph_building", false]], "openeo.rest.job": [[0, "module-openeo.rest.job", false]], "openeo.rest.mlmodel": [[0, "module-openeo.rest.mlmodel", false]], "openeo.rest.multiresult": [[0, "module-openeo.rest.multiresult", false]], "openeo.rest.udp": [[0, "module-openeo.rest.udp", false]], "openeo.rest.userfile": [[0, "module-openeo.rest.userfile", false]], "openeo.rest.vectorcube": [[0, "module-openeo.rest.vectorcube", false]], "openeo.testing": [[0, "module-openeo.testing", false]], "openeo.testing.results": [[0, "module-openeo.testing.results", false]], "openeo.udf.debug": [[0, "module-openeo.udf.debug", false]], "openeo.udf.run_code": [[0, "module-openeo.udf.run_code", false]], "openeo.udf.structured_data": [[0, "module-openeo.udf.structured_data", false]], "openeo.udf.udf_data": [[0, "module-openeo.udf.udf_data", false]], "openeo.udf.udf_signatures": [[25, "module-openeo.udf.udf_signatures", false]], "openeo.udf.xarraydatacube": [[0, "module-openeo.udf.xarraydatacube", false]], "openeo.util": [[0, "module-openeo.util", false]], "or_() (in module openeo.processes)": [[2, "openeo.processes.or_", false]], "order() (in module openeo.processes)": [[2, "openeo.processes.order", false]], "parameter (class in openeo.api.process)": [[0, "openeo.api.process.Parameter", false]], "parquetjobdatabase (class in openeo.extra.job_management)": [[11, "openeo.extra.job_management.ParquetJobDatabase", false]], "persist() (openeo.extra.job_management.jobdatabaseinterface method)": [[11, "openeo.extra.job_management.JobDatabaseInterface.persist", false]], "pgnode (class in openeo.internal.graph_building)": [[0, "openeo.internal.graph_building.PGNode", false]], "pi() (in module openeo.processes)": [[2, "openeo.processes.pi", false]], "plot() (openeo.udf.xarraydatacube.xarraydatacube method)": [[0, "openeo.udf.xarraydatacube.XarrayDataCube.plot", false]], "polling loop": [[5, "index-6", false]], "polygonal_histogram_timeseries() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.polygonal_histogram_timeseries", false]], "polygonal_mean_timeseries() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.polygonal_mean_timeseries", false]], "polygonal_median_timeseries() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.polygonal_median_timeseries", false]], "polygonal_standarddeviation_timeseries() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.polygonal_standarddeviation_timeseries", false]], "power() (in module openeo.processes)": [[2, "openeo.processes.power", false]], "power() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.power", false]], "predict_curve() (in module openeo.processes)": [[2, "openeo.processes.predict_curve", false]], "predict_curve() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.predict_curve", false]], "predict_random_forest() (in module openeo.processes)": [[2, "openeo.processes.predict_random_forest", false]], "predict_random_forest() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.predict_random_forest", false]], "preview() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.preview", false]], "print_json() (openeo.internal.graph_building.flatgraphablemixin method)": [[0, "openeo.internal.graph_building.FlatGraphableMixin.print_json", false]], "print_json() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.print_json", false]], "print_json() (openeo.rest.mlmodel.mlmodel method)": [[0, "openeo.rest.mlmodel.MlModel.print_json", false]], "print_json() (openeo.rest.multiresult.multiresult method)": [[0, "openeo.rest.multiresult.MultiResult.print_json", false]], "print_json() (openeo.rest.vectorcube.vectorcube method)": [[0, "openeo.rest.vectorcube.VectorCube.print_json", false]], "process() (in module openeo.processes)": [[0, "openeo.processes.process", false]], "process() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.process", false]], "process() (openeo.rest.vectorcube.vectorcube method)": [[0, "openeo.rest.vectorcube.VectorCube.process", false]], "process_with_node() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.process_with_node", false]], "processbasedjobcreator (class in openeo.extra.job_management)": [[11, "openeo.extra.job_management.ProcessBasedJobCreator", false]], "processbuilder (class in openeo.processes)": [[2, "openeo.processes.ProcessBuilder", false]], "product() (in module openeo.processes)": [[2, "openeo.processes.product", false]], "quantiles() (in module openeo.processes)": [[2, "openeo.processes.quantiles", false]], "raster_cube() (openeo.api.process.parameter class method)": [[0, "openeo.api.process.Parameter.raster_cube", false]], "raster_to_vector() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.raster_to_vector", false]], "rearrange() (in module openeo.processes)": [[2, "openeo.processes.rearrange", false]], "reduce_bands() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.reduce_bands", false]], "reduce_bands_udf() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.reduce_bands_udf", false]], "reduce_dimension() (in module openeo.processes)": [[2, "openeo.processes.reduce_dimension", false]], "reduce_dimension() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.reduce_dimension", false]], "reduce_spatial() (in module openeo.processes)": [[2, "openeo.processes.reduce_spatial", false]], "reduce_spatial() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.reduce_spatial", false]], "reduce_temporal() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.reduce_temporal", false]], "reduce_temporal_simple() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.reduce_temporal_simple", false]], "reduce_temporal_udf() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.reduce_temporal_udf", false]], "reduce_tiles_over_time() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.reduce_tiles_over_time", false]], "remove_service() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.remove_service", false]], "rename() (openeo.metadata.banddimension method)": [[0, "openeo.metadata.BandDimension.rename", false]], "rename() (openeo.metadata.spatialdimension method)": [[0, "openeo.metadata.SpatialDimension.rename", false]], "rename() (openeo.metadata.temporaldimension method)": [[0, "openeo.metadata.TemporalDimension.rename", false]], "rename_dimension() (in module openeo.processes)": [[2, "openeo.processes.rename_dimension", false]], "rename_dimension() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.rename_dimension", false]], "rename_labels() (in module openeo.processes)": [[2, "openeo.processes.rename_labels", false]], "rename_labels() (openeo.metadata.banddimension method)": [[0, "openeo.metadata.BandDimension.rename_labels", false]], "rename_labels() (openeo.metadata.temporaldimension method)": [[0, "openeo.metadata.TemporalDimension.rename_labels", false]], "rename_labels() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.rename_labels", false]], "request() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.request", false]], "resample_cube_spatial() (in module openeo.processes)": [[2, "openeo.processes.resample_cube_spatial", false]], "resample_cube_spatial() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.resample_cube_spatial", false]], "resample_cube_temporal() (in module openeo.processes)": [[2, "openeo.processes.resample_cube_temporal", false]], "resample_cube_temporal() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.resample_cube_temporal", false]], "resample_spatial() (in module openeo.processes)": [[2, "openeo.processes.resample_spatial", false]], "resample_spatial() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.resample_spatial", false]], "resolution_merge() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.resolution_merge", false]], "restjob (class in openeo.rest.job)": [[0, "openeo.rest.job.RESTJob", false]], "restuserdefinedprocess (class in openeo.rest.udp)": [[0, "openeo.rest.udp.RESTUserDefinedProcess", false]], "result_node() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.result_node", false]], "result_node() (openeo.rest.mlmodel.mlmodel method)": [[0, "openeo.rest.mlmodel.MlModel.result_node", false]], "result_node() (openeo.rest.vectorcube.vectorcube method)": [[0, "openeo.rest.vectorcube.VectorCube.result_node", false]], "resultasset (class in openeo.rest.job)": [[0, "openeo.rest.job.ResultAsset", false]], "results": [[5, "index-8", false]], "round() (in module openeo.processes)": [[2, "openeo.processes.round", false]], "run_jobs() (openeo.extra.job_management.multibackendjobmanager method)": [[11, "openeo.extra.job_management.MultiBackendJobManager.run_jobs", false]], "run_synchronous() (openeo.rest.job.batchjob method)": [[0, "openeo.rest.job.BatchJob.run_synchronous", false]], "run_udf() (in module openeo.processes)": [[2, "openeo.processes.run_udf", false]], "run_udf() (openeo.rest.vectorcube.vectorcube method)": [[0, "openeo.rest.vectorcube.VectorCube.run_udf", false]], "run_udf_externally() (in module openeo.processes)": [[2, "openeo.processes.run_udf_externally", false]], "sar_backscatter() (in module openeo.processes)": [[2, "openeo.processes.sar_backscatter", false]], "sar_backscatter() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.sar_backscatter", false]], "save_ml_model() (openeo.rest.mlmodel.mlmodel method)": [[0, "openeo.rest.mlmodel.MlModel.save_ml_model", false]], "save_result() (in module openeo.processes)": [[2, "openeo.processes.save_result", false]], "save_result() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.save_result", false]], "save_result() (openeo.rest.vectorcube.vectorcube method)": [[0, "openeo.rest.vectorcube.VectorCube.save_result", false]], "save_to_file() (openeo.udf.xarraydatacube.xarraydatacube method)": [[0, "openeo.udf.xarraydatacube.XarrayDataCube.save_to_file", false]], "save_user_defined_process() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.save_user_defined_process", false]], "save_user_defined_process() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.save_user_defined_process", false]], "sd() (in module openeo.processes)": [[2, "openeo.processes.sd", false]], "send_job() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.send_job", false]], "send_job() (openeo.rest.vectorcube.vectorcube method)": [[0, "openeo.rest.vectorcube.VectorCube.send_job", false]], "service() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.service", false]], "set_datacube_list() (openeo.udf.udf_data.udfdata method)": [[0, "openeo.udf.udf_data.UdfData.set_datacube_list", false]], "set_structured_data_list() (openeo.udf.udf_data.udfdata method)": [[0, "openeo.udf.udf_data.UdfData.set_structured_data_list", false]], "sgn() (in module openeo.processes)": [[2, "openeo.processes.sgn", false]], "sin() (in module openeo.processes)": [[2, "openeo.processes.sin", false]], "sinh() (in module openeo.processes)": [[2, "openeo.processes.sinh", false]], "sort() (in module openeo.processes)": [[2, "openeo.processes.sort", false]], "spatial_extent() (openeo.api.process.parameter class method)": [[0, "openeo.api.process.Parameter.spatial_extent", false]], "spatialdimension (class in openeo.metadata)": [[0, "openeo.metadata.SpatialDimension", false]], "sqrt() (in module openeo.processes)": [[2, "openeo.processes.sqrt", false]], "start": [[5, "index-4", false]], "start() (openeo.rest.job.batchjob method)": [[0, "openeo.rest.job.BatchJob.start", false]], "start_and_wait() (openeo.rest.job.batchjob method)": [[0, "openeo.rest.job.BatchJob.start_and_wait", false]], "start_job() (openeo.extra.job_management.processbasedjobcreator method)": [[11, "openeo.extra.job_management.ProcessBasedJobCreator.start_job", false]], "start_job() (openeo.rest.job.batchjob method)": [[0, "openeo.rest.job.BatchJob.start_job", false]], "start_job_thread() (openeo.extra.job_management.multibackendjobmanager method)": [[11, "openeo.extra.job_management.MultiBackendJobManager.start_job_thread", false]], "status": [[5, "index-5", false]], "status() (openeo.rest.job.batchjob method)": [[0, "openeo.rest.job.BatchJob.status", false]], "stop() (openeo.rest.job.batchjob method)": [[0, "openeo.rest.job.BatchJob.stop", false]], "stop_job() (openeo.rest.job.batchjob method)": [[0, "openeo.rest.job.BatchJob.stop_job", false]], "stop_job_thread() (openeo.extra.job_management.multibackendjobmanager method)": [[11, "openeo.extra.job_management.MultiBackendJobManager.stop_job_thread", false]], "store() (openeo.rest.udp.restuserdefinedprocess method)": [[0, "openeo.rest.udp.RESTUserDefinedProcess.store", false]], "string() (openeo.api.process.parameter class method)": [[0, "openeo.api.process.Parameter.string", false]], "structured_data_list (openeo.udf.udf_data.udfdata property)": [[0, "openeo.udf.udf_data.UdfData.structured_data_list", false]], "structureddata (class in openeo.udf.structured_data)": [[0, "openeo.udf.structured_data.StructuredData", false]], "subtract() (in module openeo.processes)": [[2, "openeo.processes.subtract", false]], "subtract() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.subtract", false]], "sum() (in module openeo.processes)": [[2, "openeo.processes.sum", false]], "tan() (in module openeo.processes)": [[2, "openeo.processes.tan", false]], "tanh() (in module openeo.processes)": [[2, "openeo.processes.tanh", false]], "temporal_interval() (openeo.api.process.parameter class method)": [[0, "openeo.api.process.Parameter.temporal_interval", false]], "temporaldimension (class in openeo.metadata)": [[0, "openeo.metadata.TemporalDimension", false]], "testdataloader (class in openeo.testing)": [[0, "openeo.testing.TestDataLoader", false]], "text_begins() (in module openeo.processes)": [[2, "openeo.processes.text_begins", false]], "text_concat() (in module openeo.processes)": [[2, "openeo.processes.text_concat", false]], "text_contains() (in module openeo.processes)": [[2, "openeo.processes.text_contains", false]], "text_ends() (in module openeo.processes)": [[2, "openeo.processes.text_ends", false]], "this (in module openeo.rest.datacube)": [[0, "openeo.rest.datacube.THIS", false]], "timeseries_json_to_pandas() (in module openeo.rest.conversions)": [[0, "openeo.rest.conversions.timeseries_json_to_pandas", false]], "to_bbox_dict() (in module openeo.util)": [[0, "openeo.util.to_bbox_dict", false]], "to_dict() (openeo.api.process.parameter method)": [[0, "openeo.api.process.Parameter.to_dict", false]], "to_dict() (openeo.internal.graph_building.pgnode method)": [[0, "openeo.internal.graph_building.PGNode.to_dict", false]], "to_dict() (openeo.rest.userfile.userfile method)": [[0, "openeo.rest.userfile.UserFile.to_dict", false]], "to_dict() (openeo.udf.udf_data.udfdata method)": [[0, "openeo.udf.udf_data.UdfData.to_dict", false]], "to_dict() (openeo.udf.xarraydatacube.xarraydatacube method)": [[0, "openeo.udf.xarraydatacube.XarrayDataCube.to_dict", false]], "to_json() (openeo.internal.graph_building.flatgraphablemixin method)": [[0, "openeo.internal.graph_building.FlatGraphableMixin.to_json", false]], "to_json() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.to_json", false]], "to_json() (openeo.rest.mlmodel.mlmodel method)": [[0, "openeo.rest.mlmodel.MlModel.to_json", false]], "to_json() (openeo.rest.multiresult.multiresult method)": [[0, "openeo.rest.multiresult.MultiResult.to_json", false]], "to_json() (openeo.rest.vectorcube.vectorcube method)": [[0, "openeo.rest.vectorcube.VectorCube.to_json", false]], "to_process_graph_argument() (openeo.internal.graph_building.pgnode static method)": [[0, "openeo.internal.graph_building.PGNode.to_process_graph_argument", false]], "trim_cube() (in module openeo.processes)": [[2, "openeo.processes.trim_cube", false]], "udf": [[25, "index-1", false]], "udf (class in openeo.rest._datacube)": [[0, "openeo.rest._datacube.UDF", false]], "udfdata (class in openeo.udf.udf_data)": [[0, "openeo.udf.udf_data.UdfData", false]], "unflatten_dimension() (in module openeo.processes)": [[2, "openeo.processes.unflatten_dimension", false]], "unflatten_dimension() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.unflatten_dimension", false]], "update() (openeo.rest.udp.restuserdefinedprocess method)": [[0, "openeo.rest.udp.RESTUserDefinedProcess.update", false]], "update_arguments() (openeo.internal.graph_building.pgnode method)": [[0, "openeo.internal.graph_building.PGNode.update_arguments", false]], "upload() (openeo.rest.userfile.userfile method)": [[0, "openeo.rest.userfile.UserFile.upload", false]], "upload_file() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.upload_file", false]], "user-defined functions": [[25, "index-0", false]], "user_context (openeo.udf.udf_data.udfdata property)": [[0, "openeo.udf.udf_data.UdfData.user_context", false]], "user_defined_process() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.user_defined_process", false]], "user_jobs() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.user_jobs", false]], "userfile (class in openeo.rest.userfile)": [[0, "openeo.rest.userfile.UserFile", false]], "validate() (openeo.rest.datacube.datacube method)": [[0, "openeo.rest.datacube.DataCube.validate", false]], "validate_process_graph() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.validate_process_graph", false]], "variance() (in module openeo.processes)": [[2, "openeo.processes.variance", false]], "vector_buffer() (in module openeo.processes)": [[2, "openeo.processes.vector_buffer", false]], "vector_reproject() (in module openeo.processes)": [[2, "openeo.processes.vector_reproject", false]], "vector_to_random_points() (in module openeo.processes)": [[2, "openeo.processes.vector_to_random_points", false]], "vector_to_raster() (openeo.rest.vectorcube.vectorcube method)": [[0, "openeo.rest.vectorcube.VectorCube.vector_to_raster", false]], "vector_to_regular_points() (in module openeo.processes)": [[2, "openeo.processes.vector_to_regular_points", false]], "vectorcube (class in openeo.rest.vectorcube)": [[0, "openeo.rest.vectorcube.VectorCube", false]], "vectorcube_from_paths() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.vectorcube_from_paths", false]], "version_discovery() (openeo.rest.connection.connection class method)": [[0, "openeo.rest.connection.Connection.version_discovery", false]], "version_info() (openeo.rest.connection.connection method)": [[0, "openeo.rest.connection.Connection.version_info", false]], "walk_nodes() (openeo.internal.graph_building.pgnode method)": [[0, "openeo.internal.graph_building.PGNode.walk_nodes", false]], "xarraydatacube (class in openeo.udf.xarraydatacube)": [[0, "openeo.udf.xarraydatacube.XarrayDataCube", false]], "xor() (in module openeo.processes)": [[2, "openeo.processes.xor", false]]}, "objects": {"openeo": [[0, 2, 1, "", "connect"], [0, 0, 0, "-", "metadata"], [2, 0, 0, "-", "processes"], [0, 0, 0, "-", "testing"], [0, 0, 0, "-", "util"]], "openeo.api": [[0, 0, 0, "-", "logs"], [0, 0, 0, "-", "process"]], "openeo.api.logs": [[0, 1, 1, "", "LogEntry"], [0, 2, 1, "", "normalize_log_level"]], "openeo.api.process": [[0, 1, 1, "", "Parameter"]], "openeo.api.process.Parameter": [[0, 3, 1, "", "array"], [0, 3, 1, "", "boolean"], [0, 3, 1, "", "bounding_box"], [0, 3, 1, "", "datacube"], [0, 3, 1, "", "date"], [0, 3, 1, "", "date_time"], [0, 3, 1, "", "geojson"], [0, 3, 1, "", "integer"], [0, 3, 1, "", "number"], [0, 3, 1, "", "object"], [0, 3, 1, "", "raster_cube"], [0, 3, 1, "", "spatial_extent"], [0, 3, 1, "", "string"], [0, 3, 1, "", "temporal_interval"], [0, 3, 1, "", "to_dict"]], "openeo.extra": [[14, 0, 0, "-", "spectral_indices"]], "openeo.extra.job_management": [[11, 1, 1, "", "CsvJobDatabase"], [11, 1, 1, "", "JobDatabaseInterface"], [11, 1, 1, "", "MultiBackendJobManager"], [11, 1, 1, "", "ParquetJobDatabase"], [11, 1, 1, "", "ProcessBasedJobCreator"]], "openeo.extra.job_management.JobDatabaseInterface": [[11, 3, 1, "", "count_by_status"], [11, 3, 1, "", "exists"], [11, 3, 1, "", "get_by_status"], [11, 3, 1, "", "persist"]], "openeo.extra.job_management.MultiBackendJobManager": [[11, 3, 1, "", "add_backend"], [11, 3, 1, "", "ensure_job_dir_exists"], [11, 3, 1, "", "get_error_log_path"], [11, 3, 1, "", "get_job_dir"], [11, 3, 1, "", "get_job_metadata_path"], [11, 3, 1, "", "on_job_cancel"], [11, 3, 1, "", "on_job_done"], [11, 3, 1, "", "on_job_error"], [11, 3, 1, "", "run_jobs"], [11, 3, 1, "", "start_job_thread"], [11, 3, 1, "", "stop_job_thread"]], "openeo.extra.job_management.ProcessBasedJobCreator": [[11, 3, 1, "", "__call__"], [11, 3, 1, "", "start_job"]], "openeo.extra.spectral_indices": [[14, 2, 1, "", "append_and_rescale_indices"], [14, 2, 1, "", "append_index"], [14, 2, 1, "", "append_indices"], [14, 2, 1, "", "compute_and_rescale_indices"], [14, 2, 1, "", "compute_index"], [14, 2, 1, "", "compute_indices"], [14, 2, 1, "", "list_indices"]], "openeo.internal": [[0, 0, 0, "-", "graph_building"]], "openeo.internal.graph_building": [[0, 1, 1, "", "FlatGraphableMixin"], [0, 1, 1, "", "PGNode"]], "openeo.internal.graph_building.FlatGraphableMixin": [[0, 3, 1, "", "print_json"], [0, 3, 1, "", "to_json"]], "openeo.internal.graph_building.PGNode": [[0, 3, 1, "", "flat_graph"], [0, 3, 1, "", "from_flat_graph"], [0, 3, 1, "", "to_dict"], [0, 3, 1, "", "to_process_graph_argument"], [0, 3, 1, "", "update_arguments"], [0, 3, 1, "", "walk_nodes"]], "openeo.metadata": [[0, 1, 1, "", "BandDimension"], [0, 1, 1, "", "CollectionMetadata"], [0, 1, 1, "", "SpatialDimension"], [0, 1, 1, "", "TemporalDimension"]], "openeo.metadata.BandDimension": [[0, 3, 1, "", "append_band"], [0, 3, 1, "", "band_index"], [0, 3, 1, "", "band_name"], [0, 3, 1, "", "filter_bands"], [0, 3, 1, "", "rename"], [0, 3, 1, "", "rename_labels"]], "openeo.metadata.SpatialDimension": [[0, 3, 1, "", "rename"]], "openeo.metadata.TemporalDimension": [[0, 3, 1, "", "rename"], [0, 3, 1, "", "rename_labels"]], "openeo.processes": [[2, 1, 1, "", "ProcessBuilder"], [2, 2, 1, "", "absolute"], [2, 2, 1, "", "add"], [2, 2, 1, "", "add_dimension"], [2, 2, 1, "", "aggregate_spatial"], [2, 2, 1, "", "aggregate_spatial_window"], [2, 2, 1, "", "aggregate_temporal"], [2, 2, 1, "", "aggregate_temporal_period"], [2, 2, 1, "", "all"], [2, 2, 1, "", "and_"], [2, 2, 1, "", "anomaly"], [2, 2, 1, "", "any"], [2, 2, 1, "", "apply"], [2, 2, 1, "", "apply_dimension"], [2, 2, 1, "", "apply_kernel"], [2, 2, 1, "", "apply_neighborhood"], [2, 2, 1, "", "apply_polygon"], [2, 2, 1, "", "arccos"], [2, 2, 1, "", "arcosh"], [2, 2, 1, "", "arcsin"], [2, 2, 1, "", "arctan"], [2, 2, 1, "", "arctan2"], [2, 2, 1, "", "ard_normalized_radar_backscatter"], [2, 2, 1, "", "ard_surface_reflectance"], [2, 2, 1, "", "array_append"], [2, 2, 1, "", "array_apply"], [2, 2, 1, "", "array_concat"], [2, 2, 1, "", "array_contains"], [2, 2, 1, "", "array_create"], [2, 2, 1, "", "array_create_labeled"], [2, 2, 1, "", "array_element"], [2, 2, 1, "", "array_filter"], [2, 2, 1, "", "array_find"], [2, 2, 1, "", "array_find_label"], [2, 2, 1, "", "array_interpolate_linear"], [2, 2, 1, "", "array_labels"], [2, 2, 1, "", "array_modify"], [2, 2, 1, "", "arsinh"], [2, 2, 1, "", "artanh"], [2, 2, 1, "", "atmospheric_correction"], [2, 2, 1, "", "between"], [2, 2, 1, "", "ceil"], [2, 2, 1, "", "climatological_normal"], [2, 2, 1, "", "clip"], [2, 2, 1, "", "cloud_detection"], [2, 2, 1, "", "constant"], [2, 2, 1, "", "cos"], [2, 2, 1, "", "cosh"], [2, 2, 1, "", "count"], [2, 2, 1, "", "create_data_cube"], [2, 2, 1, "", "cummax"], [2, 2, 1, "", "cummin"], [2, 2, 1, "", "cumproduct"], [2, 2, 1, "", "cumsum"], [2, 2, 1, "", "date_between"], [2, 2, 1, "", "date_difference"], [2, 2, 1, "", "date_shift"], [2, 2, 1, "", "dimension_labels"], [2, 2, 1, "", "divide"], [2, 2, 1, "", "drop_dimension"], [2, 2, 1, "", "e"], [2, 2, 1, "", "eq"], [2, 2, 1, "", "exp"], [2, 2, 1, "", "extrema"], [2, 2, 1, "", "filter_bands"], [2, 2, 1, "", "filter_bbox"], [2, 2, 1, "", "filter_labels"], [2, 2, 1, "", "filter_spatial"], [2, 2, 1, "", "filter_temporal"], [2, 2, 1, "", "filter_vector"], [2, 2, 1, "", "first"], [2, 2, 1, "", "fit_curve"], [2, 2, 1, "", "flatten_dimensions"], [2, 2, 1, "", "floor"], [2, 2, 1, "", "gt"], [2, 2, 1, "", "gte"], [2, 2, 1, "", "if_"], [2, 2, 1, "", "inspect"], [2, 2, 1, "", "int"], [2, 2, 1, "", "is_infinite"], [2, 2, 1, "", "is_nan"], [2, 2, 1, "", "is_nodata"], [2, 2, 1, "", "is_valid"], [2, 2, 1, "", "last"], [2, 2, 1, "", "linear_scale_range"], [2, 2, 1, "", "ln"], [2, 2, 1, "", "load_collection"], [2, 2, 1, "", "load_geojson"], [2, 2, 1, "", "load_ml_model"], [2, 2, 1, "", "load_result"], [2, 2, 1, "", "load_stac"], [2, 2, 1, "", "load_uploaded_files"], [2, 2, 1, "", "load_url"], [2, 2, 1, "", "log"], [2, 2, 1, "", "lt"], [2, 2, 1, "", "lte"], [2, 2, 1, "", "mask"], [2, 2, 1, "", "mask_polygon"], [2, 2, 1, "", "max"], [2, 2, 1, "", "mean"], [2, 2, 1, "", "median"], [2, 2, 1, "", "merge_cubes"], [2, 2, 1, "", "min"], [2, 2, 1, "", "mod"], [2, 2, 1, "", "multiply"], [2, 2, 1, "", "nan"], [2, 2, 1, "", "ndvi"], [2, 2, 1, "", "neq"], [2, 2, 1, "", "normalized_difference"], [2, 2, 1, "", "not_"], [2, 2, 1, "", "or_"], [2, 2, 1, "", "order"], [2, 2, 1, "", "pi"], [2, 2, 1, "", "power"], [2, 2, 1, "", "predict_curve"], [2, 2, 1, "", "predict_random_forest"], [0, 2, 1, "", "process"], [2, 2, 1, "", "product"], [2, 2, 1, "", "quantiles"], [2, 2, 1, "", "rearrange"], [2, 2, 1, "", "reduce_dimension"], [2, 2, 1, "", "reduce_spatial"], [2, 2, 1, "", "rename_dimension"], [2, 2, 1, "", "rename_labels"], [2, 2, 1, "", "resample_cube_spatial"], [2, 2, 1, "", "resample_cube_temporal"], [2, 2, 1, "", "resample_spatial"], [2, 2, 1, "", "round"], [2, 2, 1, "", "run_udf"], [2, 2, 1, "", "run_udf_externally"], [2, 2, 1, "", "sar_backscatter"], [2, 2, 1, "", "save_result"], [2, 2, 1, "", "sd"], [2, 2, 1, "", "sgn"], [2, 2, 1, "", "sin"], [2, 2, 1, "", "sinh"], [2, 2, 1, "", "sort"], [2, 2, 1, "", "sqrt"], [2, 2, 1, "", "subtract"], [2, 2, 1, "", "sum"], [2, 2, 1, "", "tan"], [2, 2, 1, "", "tanh"], [2, 2, 1, "", "text_begins"], [2, 2, 1, "", "text_concat"], [2, 2, 1, "", "text_contains"], [2, 2, 1, "", "text_ends"], [2, 2, 1, "", "trim_cube"], [2, 2, 1, "", "unflatten_dimension"], [2, 2, 1, "", "variance"], [2, 2, 1, "", "vector_buffer"], [2, 2, 1, "", "vector_reproject"], [2, 2, 1, "", "vector_to_random_points"], [2, 2, 1, "", "vector_to_regular_points"], [2, 2, 1, "", "xor"]], "openeo.rest": [[0, 0, 0, "-", "_datacube"], [0, 0, 0, "-", "connection"], [0, 0, 0, "-", "conversions"], [0, 0, 0, "-", "datacube"], [0, 0, 0, "-", "graph_building"], [0, 0, 0, "-", "job"], [0, 0, 0, "-", "mlmodel"], [0, 0, 0, "-", "multiresult"], [0, 0, 0, "-", "udp"], [0, 0, 0, "-", "userfile"], [0, 0, 0, "-", "vectorcube"]], "openeo.rest._datacube": [[0, 1, 1, "", "UDF"]], "openeo.rest._datacube.UDF": [[0, 3, 1, "", "from_file"], [0, 3, 1, "", "from_url"], [0, 3, 1, "", "get_run_udf_callback"]], "openeo.rest.connection": [[0, 1, 1, "", "Connection"]], "openeo.rest.connection.Connection": [[0, 3, 1, "", "as_curl"], [0, 3, 1, "", "assert_user_defined_process_support"], [0, 3, 1, "", "authenticate_basic"], [0, 3, 1, "", "authenticate_oidc"], [0, 3, 1, "", "authenticate_oidc_access_token"], [0, 3, 1, "", "authenticate_oidc_authorization_code"], [0, 3, 1, "", "authenticate_oidc_client_credentials"], [0, 3, 1, "", "authenticate_oidc_device"], [0, 3, 1, "", "authenticate_oidc_refresh_token"], [0, 3, 1, "", "authenticate_oidc_resource_owner_password_credentials"], [0, 3, 1, "", "capabilities"], [0, 3, 1, "", "collection_items"], [0, 3, 1, "", "create_job"], [0, 3, 1, "", "datacube_from_flat_graph"], [0, 3, 1, "", "datacube_from_json"], [0, 3, 1, "", "datacube_from_process"], [0, 3, 1, "", "describe_account"], [0, 3, 1, "", "describe_collection"], [0, 3, 1, "", "describe_process"], [0, 3, 1, "", "download"], [0, 3, 1, "", "execute"], [0, 3, 1, "", "get_file"], [0, 3, 1, "", "imagecollection"], [0, 3, 1, "", "job"], [0, 3, 1, "", "job_logs"], [0, 3, 1, "", "job_results"], [0, 3, 1, "", "list_collection_ids"], [0, 3, 1, "", "list_collections"], [0, 3, 1, "", "list_file_formats"], [0, 3, 1, "", "list_file_types"], [0, 3, 1, "", "list_files"], [0, 3, 1, "", "list_jobs"], [0, 3, 1, "", "list_processes"], [0, 3, 1, "", "list_service_types"], [0, 3, 1, "", "list_services"], [0, 3, 1, "", "list_udf_runtimes"], [0, 3, 1, "", "list_user_defined_processes"], [0, 3, 1, "", "load_collection"], [0, 3, 1, "", "load_disk_collection"], [0, 3, 1, "", "load_geojson"], [0, 3, 1, "", "load_ml_model"], [0, 3, 1, "", "load_result"], [0, 3, 1, "", "load_stac"], [0, 3, 1, "", "load_stac_from_job"], [0, 3, 1, "", "load_url"], [0, 3, 1, "", "remove_service"], [0, 3, 1, "", "request"], [0, 3, 1, "", "save_user_defined_process"], [0, 3, 1, "", "service"], [0, 3, 1, "", "upload_file"], [0, 3, 1, "", "user_defined_process"], [0, 3, 1, "", "user_jobs"], [0, 3, 1, "", "validate_process_graph"], [0, 3, 1, "", "vectorcube_from_paths"], [0, 3, 1, "", "version_discovery"], [0, 3, 1, "", "version_info"]], "openeo.rest.conversions": [[0, 4, 1, "", "InvalidTimeSeriesException"], [0, 2, 1, "", "datacube_from_file"], [0, 2, 1, "", "datacube_plot"], [0, 2, 1, "", "datacube_to_file"], [0, 2, 1, "", "timeseries_json_to_pandas"]], "openeo.rest.datacube": [[0, 1, 1, "", "DataCube"], [0, 5, 1, "", "THIS"]], "openeo.rest.datacube.DataCube": [[0, 3, 1, "", "__init__"], [0, 3, 1, "", "add"], [0, 3, 1, "", "add_dimension"], [0, 3, 1, "", "aggregate_spatial"], [0, 3, 1, "", "aggregate_spatial_window"], [0, 3, 1, "", "aggregate_temporal"], [0, 3, 1, "", "aggregate_temporal_period"], [0, 3, 1, "", "apply"], [0, 3, 1, "", "apply_dimension"], [0, 3, 1, "", "apply_kernel"], [0, 3, 1, "", "apply_neighborhood"], [0, 3, 1, "", "apply_polygon"], [0, 3, 1, "", "ard_normalized_radar_backscatter"], [0, 3, 1, "", "ard_surface_reflectance"], [0, 3, 1, "", "atmospheric_correction"], [0, 3, 1, "", "band"], [0, 3, 1, "", "band_filter"], [0, 3, 1, "", "chunk_polygon"], [0, 3, 1, "", "count_time"], [0, 3, 1, "", "create_collection"], [0, 3, 1, "", "create_job"], [0, 3, 1, "", "dimension_labels"], [0, 3, 1, "", "divide"], [0, 3, 1, "", "download"], [0, 3, 1, "", "drop_dimension"], [0, 3, 1, "", "execute"], [0, 3, 1, "", "execute_batch"], [0, 3, 1, "", "execute_local_udf"], [0, 3, 1, "", "filter_bands"], [0, 3, 1, "", "filter_bbox"], [0, 3, 1, "", "filter_labels"], [0, 3, 1, "", "filter_spatial"], [0, 3, 1, "", "filter_temporal"], [0, 3, 1, "", "fit_curve"], [0, 3, 1, "", "flat_graph"], [0, 3, 1, "", "flatten_dimensions"], [0, 3, 1, "", "graph_add_node"], [0, 3, 1, "", "linear_scale_range"], [0, 3, 1, "", "ln"], [0, 3, 1, "", "load_collection"], [0, 3, 1, "", "load_disk_collection"], [0, 3, 1, "", "load_stac"], [0, 3, 1, "", "log10"], [0, 3, 1, "", "log2"], [0, 3, 1, "", "logarithm"], [0, 3, 1, "", "logical_and"], [0, 3, 1, "", "logical_or"], [0, 3, 1, "", "mask"], [0, 3, 1, "", "mask_polygon"], [0, 3, 1, "", "max_time"], [0, 3, 1, "", "mean_time"], [0, 3, 1, "", "median_time"], [0, 3, 1, "", "merge"], [0, 3, 1, "", "merge_cubes"], [0, 3, 1, "", "min_time"], [0, 3, 1, "", "multiply"], [0, 3, 1, "", "ndvi"], [0, 3, 1, "", "normalized_difference"], [0, 3, 1, "", "polygonal_histogram_timeseries"], [0, 3, 1, "", "polygonal_mean_timeseries"], [0, 3, 1, "", "polygonal_median_timeseries"], [0, 3, 1, "", "polygonal_standarddeviation_timeseries"], [0, 3, 1, "", "power"], [0, 3, 1, "", "predict_curve"], [0, 3, 1, "", "predict_random_forest"], [0, 3, 1, "", "preview"], [0, 3, 1, "", "print_json"], [0, 3, 1, "", "process"], [0, 3, 1, "", "process_with_node"], [0, 3, 1, "", "raster_to_vector"], [0, 3, 1, "", "reduce_bands"], [0, 3, 1, "", "reduce_bands_udf"], [0, 3, 1, "", "reduce_dimension"], [0, 3, 1, "", "reduce_spatial"], [0, 3, 1, "", "reduce_temporal"], [0, 3, 1, "", "reduce_temporal_simple"], [0, 3, 1, "", "reduce_temporal_udf"], [0, 3, 1, "", "reduce_tiles_over_time"], [0, 3, 1, "", "rename_dimension"], [0, 3, 1, "", "rename_labels"], [0, 3, 1, "", "resample_cube_spatial"], [0, 3, 1, "", "resample_cube_temporal"], [0, 3, 1, "", "resample_spatial"], [0, 3, 1, "", "resolution_merge"], [0, 3, 1, "", "result_node"], [0, 3, 1, "", "sar_backscatter"], [0, 3, 1, "", "save_result"], [0, 3, 1, "", "save_user_defined_process"], [0, 3, 1, "", "send_job"], [0, 3, 1, "", "subtract"], [0, 3, 1, "", "to_json"], [0, 3, 1, "", "unflatten_dimension"], [0, 3, 1, "", "validate"]], "openeo.rest.graph_building": [[0, 1, 1, "", "CollectionProperty"], [0, 2, 1, "", "collection_property"]], "openeo.rest.job": [[0, 1, 1, "", "BatchJob"], [0, 1, 1, "", "JobResults"], [0, 1, 1, "", "RESTJob"], [0, 1, 1, "", "ResultAsset"]], "openeo.rest.job.BatchJob": [[0, 3, 1, "", "delete"], [0, 3, 1, "", "delete_job"], [0, 3, 1, "", "describe"], [0, 3, 1, "", "describe_job"], [0, 3, 1, "", "download_result"], [0, 3, 1, "", "download_results"], [0, 3, 1, "", "estimate"], [0, 3, 1, "", "estimate_job"], [0, 3, 1, "", "get_result"], [0, 3, 1, "", "get_results"], [0, 3, 1, "", "get_results_metadata_url"], [0, 6, 1, "", "job_id"], [0, 3, 1, "", "list_results"], [0, 3, 1, "", "logs"], [0, 3, 1, "", "run_synchronous"], [0, 3, 1, "", "start"], [0, 3, 1, "", "start_and_wait"], [0, 3, 1, "", "start_job"], [0, 3, 1, "", "status"], [0, 3, 1, "", "stop"], [0, 3, 1, "", "stop_job"]], "openeo.rest.job.JobResults": [[0, 3, 1, "", "download_file"], [0, 3, 1, "", "download_files"], [0, 3, 1, "", "get_asset"], [0, 3, 1, "", "get_assets"], [0, 3, 1, "", "get_metadata"]], "openeo.rest.job.ResultAsset": [[0, 3, 1, "", "download"], [0, 6, 1, "", "href"], [0, 3, 1, "", "load_bytes"], [0, 3, 1, "", "load_json"], [0, 6, 1, "", "metadata"], [0, 6, 1, "", "name"]], "openeo.rest.mlmodel": [[0, 1, 1, "", "MlModel"]], "openeo.rest.mlmodel.MlModel": [[0, 3, 1, "", "create_job"], [0, 3, 1, "", "execute_batch"], [0, 3, 1, "", "flat_graph"], [0, 3, 1, "", "load_ml_model"], [0, 3, 1, "", "print_json"], [0, 3, 1, "", "result_node"], [0, 3, 1, "", "save_ml_model"], [0, 3, 1, "", "to_json"]], "openeo.rest.multiresult": [[0, 1, 1, "", "MultiResult"]], "openeo.rest.multiresult.MultiResult": [[0, 3, 1, "", "__init__"], [0, 3, 1, "", "print_json"], [0, 3, 1, "", "to_json"]], "openeo.rest.udp": [[0, 1, 1, "", "RESTUserDefinedProcess"], [0, 2, 1, "", "build_process_dict"]], "openeo.rest.udp.RESTUserDefinedProcess": [[0, 3, 1, "", "delete"], [0, 3, 1, "", "describe"], [0, 3, 1, "", "store"], [0, 3, 1, "", "update"]], "openeo.rest.userfile": [[0, 1, 1, "", "UserFile"]], "openeo.rest.userfile.UserFile": [[0, 3, 1, "", "delete"], [0, 3, 1, "", "download"], [0, 3, 1, "", "from_metadata"], [0, 3, 1, "", "to_dict"], [0, 3, 1, "", "upload"]], "openeo.rest.vectorcube": [[0, 1, 1, "", "VectorCube"]], "openeo.rest.vectorcube.VectorCube": [[0, 3, 1, "", "apply_dimension"], [0, 3, 1, "", "create_job"], [0, 3, 1, "", "download"], [0, 3, 1, "", "execute"], [0, 3, 1, "", "execute_batch"], [0, 3, 1, "", "filter_bands"], [0, 3, 1, "", "filter_bbox"], [0, 3, 1, "", "filter_labels"], [0, 3, 1, "", "filter_vector"], [0, 3, 1, "", "fit_class_random_forest"], [0, 3, 1, "", "fit_regr_random_forest"], [0, 3, 1, "", "flat_graph"], [0, 3, 1, "", "load_geojson"], [0, 3, 1, "", "load_url"], [0, 3, 1, "", "print_json"], [0, 3, 1, "", "process"], [0, 3, 1, "", "result_node"], [0, 3, 1, "", "run_udf"], [0, 3, 1, "", "save_result"], [0, 3, 1, "", "send_job"], [0, 3, 1, "", "to_json"], [0, 3, 1, "", "vector_to_raster"]], "openeo.testing": [[0, 1, 1, "", "TestDataLoader"], [0, 0, 0, "-", "results"]], "openeo.testing.TestDataLoader": [[0, 3, 1, "", "get_path"], [0, 3, 1, "", "load_json"]], "openeo.testing.results": [[0, 2, 1, "", "assert_job_results_allclose"], [0, 2, 1, "", "assert_xarray_allclose"], [0, 2, 1, "", "assert_xarray_dataarray_allclose"], [0, 2, 1, "", "assert_xarray_dataset_allclose"]], "openeo.udf": [[0, 0, 0, "-", "debug"], [0, 0, 0, "-", "run_code"], [0, 0, 0, "-", "structured_data"], [0, 0, 0, "-", "udf_data"], [25, 0, 0, "-", "udf_signatures"], [0, 0, 0, "-", "xarraydatacube"]], "openeo.udf.debug": [[0, 2, 1, "", "inspect"]], "openeo.udf.run_code": [[0, 2, 1, "", "execute_local_udf"], [0, 2, 1, "", "extract_udf_dependencies"]], "openeo.udf.structured_data": [[0, 1, 1, "", "StructuredData"]], "openeo.udf.udf_data": [[0, 1, 1, "", "UdfData"]], "openeo.udf.udf_data.UdfData": [[0, 7, 1, "", "datacube_list"], [0, 7, 1, "", "feature_collection_list"], [0, 3, 1, "", "from_dict"], [0, 3, 1, "", "get_datacube_list"], [0, 3, 1, "", "get_feature_collection_list"], [0, 3, 1, "", "get_structured_data_list"], [0, 3, 1, "", "set_datacube_list"], [0, 3, 1, "", "set_structured_data_list"], [0, 7, 1, "", "structured_data_list"], [0, 3, 1, "", "to_dict"], [0, 7, 1, "", "user_context"]], "openeo.udf.udf_signatures": [[25, 2, 1, "", "apply_datacube"], [25, 2, 1, "", "apply_metadata"], [25, 2, 1, "", "apply_timeseries"], [25, 2, 1, "", "apply_udf_data"], [25, 2, 1, "", "apply_vectorcube"]], "openeo.udf.xarraydatacube": [[0, 1, 1, "", "XarrayDataCube"]], "openeo.udf.xarraydatacube.XarrayDataCube": [[0, 7, 1, "", "array"], [0, 3, 1, "", "from_dict"], [0, 3, 1, "", "from_file"], [0, 3, 1, "", "get_array"], [0, 3, 1, "", "plot"], [0, 3, 1, "", "save_to_file"], [0, 3, 1, "", "to_dict"]], "openeo.util": [[0, 1, 1, "", "BBoxDict"], [0, 2, 1, "", "load_json_resource"], [0, 2, 1, "", "normalize_crs"], [0, 2, 1, "", "to_bbox_dict"]], "openeo.util.BBoxDict": [[0, 3, 1, "", "from_dict"], [0, 3, 1, "", "from_sequence"]]}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "function", "Python function"], "3": ["py", "method", "Python method"], "4": ["py", "exception", "Python exception"], "5": ["py", "data", "Python data"], "6": ["py", "attribute", "Python attribute"], "7": ["py", "property", "Python property"]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:function", "3": "py:method", "4": "py:exception", "5": "py:data", "6": "py:attribute", "7": "py:property"}, "terms": {"": [0, 1, 2, 3, 4, 5, 6, 7, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 26], "0": [0, 2, 3, 4, 5, 8, 9, 11, 12, 14, 15, 16, 17, 18, 19, 20, 22, 24, 26], "00": [2, 4, 5, 12, 17, 25], "000": 17, "0001": [4, 25, 26], "001": 2, "002": 2, "004": 20, "005": 2, "00z": 2, "01": [0, 2, 4, 5, 6, 11, 12, 13, 17, 18, 20, 24, 25, 26], "010r7": 2, "02": [0, 2, 4, 5, 11, 12, 20, 24], "03": [5, 9, 11, 17, 20, 25, 26], "038": 12, "04": [4, 17, 20, 25], "04t14": 5, "05": [2, 3, 6, 12, 13, 17, 20, 24, 26], "06": [0, 4, 5, 12, 13, 17, 20, 24, 26], "069": 9, "06z": 5, "07": [0, 3, 5, 9, 17, 20, 23, 24, 26], "08": [0, 2, 17, 20, 23, 24], "08730b1b5458a4ed34edeee60ac79254": 12, "087806252": 9, "087f": 5, "08t08": 5, "09": [18, 20, 26], "096e": 12, "0_decad": 2, "0a1": 19, "0o600": 3, "0x7f6505a40d00": 24, "1": [0, 1, 2, 3, 4, 5, 9, 11, 12, 14, 16, 18, 19, 20, 22, 24, 25, 26], "10": [0, 2, 3, 4, 8, 9, 12, 18, 20, 22, 24, 25, 26], "100": [0, 2, 7, 18], "10000000": 0, "100x100km": 13, "1024": 12, "104": 7, "105": 18, "10m": 0, "10mb": 7, "11": [0, 2, 5, 12, 20], "11111111111111": 26, "112": 25, "113": 7, "11354": 12, "115": 7, "116": 17, "11t13": 3, "11z": 5, "12": [0, 2, 4, 12, 17, 19, 20, 26], "123": [3, 7], "127": 19, "128": 25, "128x128": 25, "13": [0, 3, 20], "133": 7, "134": 7, "136": 7, "1386": 6, "138916": 6, "14": [0, 4, 11, 17, 20], "1414": 4, "1414b": 3, "1417": 4, "144": 7, "1443": 4, "1444": 4, "147": 7, "148e": 12, "15": [2, 4, 12, 17, 19, 20, 25, 26], "153": 7, "155": [4, 7], "155e": 12, "156": 4, "157": 7, "158": 7, "159": 7, "16": [0, 2, 4, 6, 20, 24, 25], "163": 4, "17": [0, 4, 17, 20], "170": 7, "175": 7, "176": 7, "1768": 4, "177": 12, "1772": 4, "178": 7, "1785": 4, "1787": 26, "179": 4, "1793": 26, "17t12": 17, "18": [0, 2, 3, 4, 17, 20], "182": 7, "184": 7, "1852": 26, "1855": 4, "1867": 26, "187": 7, "1873": 26, "1891": 4, "1892": 4, "19": [0, 4, 5, 17, 20], "190": 7, "191": 7, "192": 7, "197": 7, "198": 7, "1981": 2, "1e": 0, "2": [0, 2, 3, 4, 5, 9, 11, 12, 13, 17, 18, 20, 22, 24, 25, 26], "20": [0, 2, 4, 6, 20, 24], "200": 7, "2001": 0, "201": 7, "2010": 2, "2017": 9, "2018": 26, "2019": [0, 9, 12, 24], "202": 7, "2020": [2, 3, 4, 6, 13, 17, 18, 19, 20, 24, 26], "2021": [4, 5, 11, 16, 17, 20, 26], "2022": [3, 5, 12, 17, 20, 25], "2023": [12, 17, 20, 23], "2024": 20, "204": 7, "205": 7, "209": 7, "20m": 0, "20z": 3, "21": [0, 2, 20, 26], "210": 7, "21e": 12, "22": [0, 2, 20, 25], "2206": 9, "221": 7, "225": 7, "228": 7, "229": 7, "23": [0, 1, 2, 5, 17, 20, 24], "233": 7, "235": 0, "237": 7, "23z": 5, "24": [0, 20], "240": [0, 7], "242": 7, "2450": 26, "2453": 26, "2467": 26, "247": 7, "2491": 26, "2498": 26, "24t10": 5, "24t13": 3, "25": [0, 12, 20, 25], "250": 14, "255": 0, "256": 25, "259": 7, "26": [0, 14, 17, 20], "260": 7, "264": 7, "27": [0, 12, 20, 26], "274": 7, "275": 7, "276": 7, "278": 7, "279": 7, "28": [0, 2, 5, 20], "280": 7, "284": 7, "285": 7, "286": 7, "287": 7, "288": 7, "288079": 4, "29": [0, 2, 17, 20], "291": 7, "291835566": 9, "293": 7, "298": 7, "2b": 0, "2d": 0, "3": [0, 2, 5, 7, 9, 12, 15, 17, 18, 22, 24, 25, 26], "30": [0, 4, 12, 16, 20], "300": [0, 7], "300250": 4, "302": 7, "304": 7, "308": 7, "309": 7, "31": [0, 2, 4, 11, 20, 25, 26], "310": [7, 16], "312": 7, "314": 7, "316": 7, "317": 7, "32": [0, 2, 11, 16, 20, 26], "320647": 6, "323": 7, "323e": 12, "324": 7, "326": 7, "32632": [0, 12], "327598": 4, "328": 7, "33": [0, 11, 20], "331": 7, "332": 7, "3339": [2, 17], "335": 7, "3359": 7, "336": 7, "3377": 7, "338": 7, "34": [17, 20], "3456": 7, "346": 7, "3485": 7, "3493": 7, "3496": 7, "35": [0, 5, 15, 18, 20], "350": 7, "3509": 7, "352": 7, "3544": 7, "3555": 7, "3578": 7, "3585": 7, "36": [0, 2, 5, 15, 20], "3609": 7, "361": 7, "3612": 7, "3617": 7, "3645": 7, "365": [2, 7], "366": 7, "3670": 7, "3687": 7, "3698": 7, "3700": 7, "373": 7, "3739": 7, "377": 7, "3846": 7, "386": 7, "387": 7, "3889": 7, "39": 17, "390": 7, "391": 7, "3927399": 9, "3jkd": 22, "4": [0, 4, 5, 6, 9, 17, 18, 20, 22, 25, 26], "40": [3, 12], "4008": 7, "401": 7, "4011": 7, "4012": 7, "403": 7, "404": 7, "40bc": 5, "410": 7, "412": 7, "413": 17, "414": 7, "418": 7, "419": 7, "41de": 5, "42": 0, "421": 7, "424": 7, "425": 7, "431": 7, "4326": [0, 17, 25], "432f3b3ef3a": 5, "433": 7, "436": 7, "441b": 5, "442": 7, "443": 7, "448": 7, "449": 7, "451": 7, "452": 7, "454": 7, "457": 7, "459": 7, "46": [5, 12], "460": 7, "463a": 5, "464": 7, "47": 12, "470": 7, "470f": 5, "471": 7, "48": 6, "484": 7, "485": 7, "491": 7, "493": 7, "499": 7, "4990200": 12, "4a54": 5, "4bbb3c72a9234ee998a6de940a148e346a": 25, "4c2e": 5, "4e720e70": 5, "5": [0, 2, 4, 5, 12, 15, 17, 18, 20, 22, 24, 25, 26], "50": [0, 5, 12], "501": 7, "502": 7, "508": [0, 7, 25], "50z": 3, "51": [0, 4, 5, 9, 17, 20, 22, 24, 25, 26], "511": 7, "512": [0, 7], "512x512": 13, "513": 7, "515": 7, "5161000": 0, "5181000": 0, "52": [0, 2, 12], "522": 7, "522499": 4, "524124": 6, "526": 7, "527": 7, "528": 7, "529591": 4, "52e": 12, "53": 12, "5300040": 12, "549": 7, "54ee": 5, "550": 7, "559ed2d4e53c": 5, "56": 17, "566": 7, "567": 7, "568": 7, "56z": 17, "571": 7, "573": 7, "578": 7, "57da31da": 5, "58": 5, "583": 7, "588": 7, "59": 5, "590": 7, "59003": 9, "592": 7, "598": 7, "5a1": 12, "5d806224": 5, "5x3x6": 5, "6": [0, 4, 5, 12, 18, 20, 22, 25, 26], "60": [0, 11, 25], "600000": 12, "604": 7, "612": 7, "623": 7, "624": 7, "628": 7, "633011": 4, "635": 7, "638": 7, "64": 0, "641": 7, "645": 7, "652": 7, "652000": 0, "654": 7, "655": 7, "656": 7, "657": 7, "665": 12, "666": 7, "672000": 0, "676": 7, "677": 7, "680": 7, "681": 7, "683": 7, "68caccff": 5, "7": [0, 2, 4, 12, 20, 26], "70": 26, "705": 12, "723": [7, 25], "726": 4, "74ce": 5, "75": [0, 17, 26], "758216409030558": 9, "75e": 12, "7946": 0, "7fd4": 5, "8": [0, 2, 16, 18, 19, 20, 25, 26], "80": [12, 17], "8000": [14, 19], "8025": 12, "809760": 12, "84": 17, "843e": 12, "846b": 3, "86": 16, "881": 7, "88bd": 5, "8949": 9, "9": [0, 2, 4, 12, 14, 18, 19, 20], "90757893795b": 5, "91": 7, "92db": 5, "935": 12, "96": 7, "9_decad": 2, "9d7d": 5, "A": [0, 2, 3, 4, 5, 11, 13, 17, 20, 26], "AND": [0, 2], "As": [0, 1, 2, 4, 12, 16, 17, 18, 19, 25, 26], "At": [0, 2, 4, 8, 16], "Be": [0, 2, 16], "Being": 24, "By": [0, 2, 4, 11, 16, 17], "For": [0, 1, 2, 3, 4, 5, 9, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26], "If": [0, 2, 3, 4, 5, 7, 11, 12, 14, 15, 19, 21, 24, 25, 26], "In": [0, 2, 3, 4, 5, 11, 12, 14, 15, 17, 19, 24, 25, 26], "It": [0, 2, 3, 4, 5, 6, 12, 13, 14, 16, 17, 18, 19, 20, 21, 24, 25, 26], "Its": 0, "No": [2, 7, 18, 21], "Not": [0, 2, 6, 15, 25, 26], "OR": 2, "On": [1, 2, 3, 7, 9, 26], "One": [0, 2, 11, 17], "Or": [0, 3, 15, 17, 19, 25], "Such": 17, "That": 3, "The": [0, 1, 2, 3, 4, 5, 7, 8, 9, 11, 12, 13, 14, 15, 16, 17, 19, 20, 21, 22, 23, 24, 25, 26], "Their": 25, "Then": [2, 19], "There": [0, 2, 3, 9, 16, 19, 25, 26], "These": [0, 2, 4, 9, 19, 24, 25], "To": [0, 1, 2, 3, 4, 9, 13, 14, 17, 18, 19, 21, 22, 24, 25, 26], "With": [0, 3, 5, 12, 25], "_": 11, "_____": 17, "________": 17, "____________": 17, "_________________________": 17, "__add__": 23, "__and__": 23, "__call__": 11, "__eq__": 23, "__file__": 0, "__ge__": 23, "__getitem__": 23, "__gt__": 23, "__init__": 0, "__invert__": 23, "__le__": 23, "__lt__": 23, "__mul__": 23, "__ne__": 23, "__neg__": 23, "__or__": 23, "__pow__": 23, "__radd__": 23, "__rmul__": 23, "__rpow__": 23, "__rsub__": 23, "__rtruediv__": 23, "__sub__": 23, "__truediv__": 23, "__version__": 19, "_build": 19, "_builder": 0, "_datacub": 0, "_pg": 7, "_sourc": 0, "_start_job_default": 11, "_version": 19, "_without_": 25, "a1": 19, "a366985ebd67": 5, "aaaaaa": [0, 2], "aai": 4, "ab": 2, "abaa": 5, "abbrevi": [3, 25, 26], "abcdef": [0, 2], "abcdefgh": [0, 2], "abil": 25, "abl": [2, 3, 4, 25], "abort": 2, "about": [0, 2, 3, 4, 7, 17, 18, 19, 21, 24, 25, 26], "abov": [0, 2, 3, 4, 5, 11, 16, 17, 18, 19, 24, 25, 26], "absenc": 0, "absolut": [0, 2, 23, 24], "abstract": [0, 4, 7, 11, 15, 19, 25], "academ": 3, "accept": [0, 1, 2, 11, 25], "access": [0, 3, 7, 13, 15, 16], "access_token": 0, "accident": 3, "accomod": 25, "accord": [0, 2], "accordingli": [2, 7], "account": [0, 2, 4, 5, 20, 26], "accustom": 24, "achiev": 25, "acquisit": 2, "across": [7, 19, 25], "act": 3, "action": [5, 7, 19], "activ": [3, 19], "actual": [0, 1, 2, 4, 7, 14, 17, 25], "acycl": 0, "ad": [0, 2, 3, 6, 8, 11, 14, 15, 18, 19, 20, 22], "add": [0, 1, 2, 3, 6, 7, 11, 12, 15, 17, 18, 19, 23, 24], "add_backend": 11, "add_dimens": [0, 2, 7, 23], "addit": [0, 2, 3, 7, 8, 9, 11, 14, 17, 18, 19, 24, 25, 26], "addition": [12, 14], "address": [2, 3, 4, 7, 19], "adher": 7, "adjust": [7, 25], "adopt": [0, 25], "advanc": [0, 7, 17, 25], "advantag": 25, "advertis": [0, 5, 7, 17], "aerosol": 2, "afe6": 5, "affect": 9, "after": [0, 2, 3, 4, 5, 6, 7, 11, 19, 25], "afterward": [0, 2, 11], "again": [2, 3, 4, 17, 19, 26], "against": [0, 2, 7], "aggreg": [0, 2, 17, 20, 24, 26], "aggregate_spati": [0, 2, 4, 7, 15, 17, 22, 23, 24, 26], "aggregate_spatial_window": [0, 2, 7, 23], "aggregate_tempor": [0, 2, 7, 23, 24], "aggregate_temporal_period": [0, 2, 7, 23], "ai": 25, "aid": 25, "aim": [0, 25], "ak": 18, "algorithm": [0, 2, 3, 4, 9, 25, 26], "alia": [0, 2, 5, 7], "alias": 7, "align": [0, 2, 7], "all": [0, 2, 3, 4, 6, 7, 11, 13, 17, 18, 19, 22, 23, 24, 25, 26], "allow": [0, 2, 3, 4, 5, 7, 8, 11, 12, 15, 16, 17, 18, 19, 24, 25, 26], "allow_common": 0, "alon": 6, "along": [0, 1, 2, 4, 5, 24, 25], "alpha": 19, "alpha1": 25, "alreadi": [0, 2, 3, 5, 6, 7, 11, 17, 19, 26], "also": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26], "altern": [4, 7], "although": [7, 17], "alwai": [0, 2, 3, 7, 8, 17, 25], "ambigu": 7, "among": [0, 16], "amongst": 11, "amount": [0, 17], "an": [0, 1, 2, 3, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 24, 26], "analysi": [4, 7, 10, 17, 20], "and_": [2, 23], "angl": [0, 2, 9], "ani": [0, 2, 3, 4, 5, 9, 11, 12, 17, 19, 23, 25], "anno": [0, 2], "annoi": [6, 24], "annot": 25, "anomali": [2, 23], "anonym": [1, 2, 3, 24], "anoth": [0, 2, 3, 5, 16, 17, 19, 22, 24, 25, 26], "anymor": [0, 7, 25], "anyth": [11, 25], "anywai": 7, "apart": [0, 6, 11, 17], "apertur": 9, "api": [3, 4, 5, 7, 9, 10, 12, 16, 17, 18, 19, 20, 22, 26], "appear": 2, "append": [0, 2, 14, 19, 24, 25], "append_and_rescale_indic": [10, 14], "append_band": 0, "append_index": [10, 14], "append_indic": [10, 14], "appli": [0, 2, 7, 9, 13, 17, 18, 20, 23, 24], "applic": [0, 2, 5, 14, 20, 21, 22, 26], "apply_datacub": [0, 7, 25], "apply_dimens": [0, 2, 7, 20, 23, 24], "apply_kernel": [0, 2, 18, 23, 24], "apply_metadata": 25, "apply_neighborhood": [0, 2, 7, 20, 23, 24], "apply_polygon": [0, 2, 7], "apply_timeseri": 25, "apply_udf_data": 25, "apply_vectorcub": [7, 25], "appreci": 19, "approach": [0, 3, 16, 17, 19, 24, 25], "appropri": [0, 17, 24], "april": [0, 2, 17], "ar": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 14, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26], "arbitrari": [0, 2], "arcco": [2, 23, 24], "architectur": 3, "archiv": [17, 19, 25], "arcosh": [2, 23, 24], "arcsin": [2, 23], "arctan": [2, 23], "arctan2": [2, 23, 24], "ard": 9, "ard_normalized_radar_backscatt": [0, 2, 9, 23], "ard_surface_reflect": [0, 2, 7, 9, 23], "area": [0, 2, 13, 17], "arg": [0, 11], "argument": [0, 1, 2, 3, 4, 5, 6, 7, 11, 14, 15, 16, 17, 18, 25, 26], "arithmet": 2, "around": [0, 7, 13], "arrai": [0, 1, 2, 7, 12, 18, 24, 25, 26], "array1": 2, "array2": 2, "array_append": [2, 23], "array_appli": [2, 23], "array_concat": [2, 7, 23], "array_contain": [2, 23], "array_cr": [2, 4, 7, 23], "array_create_label": [2, 23], "array_el": [2, 7, 23], "array_filt": [2, 23], "array_find": [2, 23], "array_find_label": [2, 23], "array_interpolate_linear": [2, 23], "array_label": [2, 23], "array_modifi": [2, 7, 23], "arrayelementnotavail": 2, "arraynotlabel": 2, "arsinh": [2, 23], "artanh": [2, 23, 24], "artefact": 4, "artifact": 19, "artifactori": [13, 19], "as_curl": [0, 7], "asc": 2, "ascend": 2, "ascendingprobabilitiesrequir": 2, "asctim": 11, "ashap": 0, "ask": 3, "aspect": [4, 24, 25, 26], "assembl": [7, 24], "assert": [0, 21, 25], "assert_job_results_allclos": 0, "assert_user_defined_process_support": 0, "assert_xarray_allclos": 0, "assert_xarray_dataarray_allclos": 0, "assert_xarray_dataset_allclos": 0, "assertionerror": 0, "assess": 2, "asset": [0, 2, 7, 9, 13, 22], "assign": [0, 2], "associ": [0, 3, 19], "assum": [0, 2, 3, 13, 15, 18, 19, 22, 24, 25], "assur": 4, "asynchron": [20, 26], "atmoshper": 9, "atmospher": [0, 2, 10], "atmospheric_correct": [0, 2, 7, 9, 23], "atmospheric_correction_method": [0, 2], "atmospheric_correction_opt": [0, 2, 7], "atol": 0, "attach": [14, 19], "attempt": [0, 3, 7], "attribut": [7, 12], "audienc": [6, 21, 22], "august": [0, 2], "auth": [0, 4, 7, 8, 20], "auth_config": 0, "auth_connect": 13, "auth_opt": 0, "auth_typ": 0, "authbas": 0, "authconfig": [0, 7], "authent": [0, 2, 7, 8, 20], "authenticate_bas": [0, 3], "authenticate_oidc": [0, 3, 4, 7, 20, 25, 26], "authenticate_oidc_access_token": [0, 7], "authenticate_oidc_authorization_cod": [0, 3], "authenticate_oidc_client_credenti": [0, 3, 7], "authenticate_oidc_devic": [0, 3], "authenticate_oidc_refresh_token": [0, 3], "authenticate_oidc_resource_owner_password_credenti": [0, 3], "author": [0, 3, 7], "auto": [0, 7, 8, 20, 25], "auto_add_save_result": [0, 7], "auto_authent": [3, 8], "auto_collaps": 0, "auto_decod": [0, 7], "auto_valid": 0, "autobuild": 19, "autodetect": 7, "autogener": [7, 23], "autom": 3, "automat": [0, 2, 3, 4, 7, 8, 10, 11, 15, 17, 18, 19, 24, 25], "autotick": 19, "auxiliari": 14, "avail": [0, 2, 3, 4, 5, 7, 9, 11, 13, 16, 17, 19, 21, 24, 25], "averag": [2, 17], "avg": 24, "avoid": [0, 2, 3, 6, 7, 13, 15, 17, 19, 21, 22, 25], "aw": 12, "awai": 15, "awar": [0, 2, 17], "awesom": [7, 14], "ax": [0, 2], "axi": [0, 2, 25], "azimuth": 2, "b": [2, 4, 14], "b02": [4, 9, 11, 12, 17, 22, 25, 26], "b03": [9, 11, 12, 17, 22, 25], "b04": [4, 9, 12, 13, 17, 22, 25, 26], "b06d": 5, "b08": [4, 12, 26], "b09": 9, "b0e8adcf": 5, "b1": [0, 19], "b11": 9, "b2": 0, "b3": 0, "b3c0ea88ff38": 5, "b3dw": 22, "b4": 0, "b76a": 5, "b7bfd3b59669": 5, "b8a": 9, "back": [0, 1, 2, 5, 7, 8, 12, 16, 17, 18, 20, 24, 25, 26], "backend": [0, 3, 4, 7, 10, 12, 13, 15, 17, 18, 20, 25], "background": [0, 3, 10, 20], "backscatt": [0, 2, 10], "backward": 0, "bad": [3, 7], "band": [0, 2, 7, 9, 10, 11, 12, 13, 17, 20, 22, 24, 25, 26], "band_filt": 0, "band_index": 0, "band_math_mod": 0, "band_nam": 0, "banddimens": 0, "bar": [6, 7, 11], "bare": 4, "base": [0, 2, 4, 5, 7, 9, 10, 17, 19, 20, 21, 25, 26], "basegeometri": 0, "basemap": 7, "bash": [3, 19], "basi": 3, "basic": [0, 2, 5, 7, 8, 9, 10, 13, 16, 17, 20, 25, 26], "basicconfig": 11, "batch": [0, 1, 2, 7, 11, 13, 18, 20, 22, 25], "batchjob": [0, 5, 7, 11, 22, 25], "bbox": [0, 5, 7, 22, 24, 26], "bbox_filt": 7, "bbox_param": 0, "bboxdict": 0, "bc13": 5, "bdist_wheel": 19, "be04": 5, "bear": 4, "bearer": 0, "becaus": [0, 1, 2, 3, 4, 6, 9, 11, 13, 19, 24, 25, 26], "becom": 3, "been": [0, 2, 7, 9, 11, 14, 15, 25], "befor": [0, 2, 3, 4, 5, 7, 17, 25, 26], "begin": 2, "behavior": [0, 2, 6, 7], "behaviour": [0, 11], "behind": [4, 24], "being": [0, 2, 12, 24, 25], "below": [2, 3, 11, 19, 23, 24, 25], "benchmark": 0, "best": [0, 4, 7, 17, 20], "beta": [16, 19], "beta0": [0, 2], "better": [0, 2, 3, 4, 6, 7, 15], "between": [0, 2, 3, 7, 9, 11, 17, 18, 23, 25], "beyond": 0, "bff6f9b14b8d": 5, "bilinear": 2, "bill": [0, 7], "bin": 19, "binari": [0, 4, 19], "bit": [0, 4, 5, 17, 20, 22, 25], "black": 6, "blindli": [7, 25], "block": [0, 2, 4, 11, 19, 24, 25, 26], "blue": [0, 4, 14, 18, 26], "blur": [0, 2], "bmud": 4, "bodi": 0, "bool": [0, 11, 14], "boolean": [0, 2, 15, 26], "bootstrap": 3, "border": [0, 2, 17, 25], "bot": 19, "both": [0, 2, 9, 11, 19, 25], "bottom": [0, 2], "bound": [0, 2, 12, 17, 24, 26], "boundari": [0, 2], "bounding_box": [0, 7, 26], "box": [0, 2, 8, 11, 17, 24, 25, 26], "bp": 0, "branch": 19, "brdf": 9, "break": [7, 16], "breiman": 0, "briefli": 26, "bright": [0, 2], "broad": 21, "broken": 7, "browser": [3, 4, 19], "budget": [0, 7], "buffer": 2, "bug": [2, 6, 7, 19, 26], "build": [1, 2, 4, 6, 7, 11, 16, 20, 22, 24, 25], "build_process_dict": [0, 7, 26], "builder": 0, "built": [0, 3, 19], "builtin": [7, 24, 25], "bump": [7, 19], "bunch": 6, "burn": 19, "button": [5, 19], "byte": 0, "c": [19, 21, 26], "c9c51646b6a4": 5, "cach": [3, 7, 8], "calcul": [0, 2, 4, 14, 26], "calendar": [0, 2], "calibr": [9, 13], "call": [0, 2, 3, 4, 5, 6, 7, 11, 12, 13, 16, 17, 18, 21, 22, 25, 26], "callabl": [0, 1, 2, 7, 11], "callback": [0, 1, 2, 4, 7, 11, 20], "can": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26], "cancel": [0, 5, 7, 11], "cancel_running_job_aft": [7, 11], "candid": 2, "canon": [0, 2, 16], "canopi": [0, 4], "capabl": [0, 4, 7], "captur": 0, "card4l": [0, 2, 9], "cardin": 0, "care": [0, 2, 3], "carefulli": [17, 21], "case": [0, 2, 3, 5, 7, 9, 11, 12, 13, 14, 15, 17, 18, 19, 20, 21, 24, 25, 26], "case_sensit": 2, "catalog": [0, 2, 12, 17], "categori": [0, 2, 7], "caus": [7, 17], "caveat": [3, 17], "cbartext": 0, "cd": 19, "cdefgh": [0, 2], "cdse": [7, 19], "ceil": [2, 23], "cell": [2, 4, 5, 6], "celsiu": 26, "center": [0, 2], "center_wavelength": 12, "central": [3, 4, 24, 25], "centroid": 2, "ceo": 9, "certain": [0, 6, 9, 13, 17, 26], "certainli": [3, 9], "cf": [12, 19], "chain": [0, 24, 25, 26], "challeng": [3, 25], "chang": [0, 2, 4, 8, 11, 12, 14, 15, 16, 19, 20, 21, 22], "changelog": [19, 20], "channel": [0, 7, 19], "chapter": 4, "charact": 25, "charg": 0, "cheap": 13, "check": [2, 4, 5, 12, 21, 25, 26], "check_error": 0, "checkout": [19, 21], "child": [0, 1, 2, 4, 7, 20, 25], "chmod": 7, "choic": [0, 3, 6, 25], "choos": [0, 2, 17, 25], "chosen": [0, 2, 9, 25], "chunk": [0, 25], "chunk_polygon": [0, 7], "chunk_siz": [0, 7], "chunksiz": 12, "chunktyp": 12, "ci": 7, "circumv": [2, 17], "cite": 25, "cl": 0, "cl13n7s3cr3t": 3, "clariti": 7, "class": [0, 1, 4, 5, 6, 7, 11, 18, 20, 22, 24, 26], "classif": [0, 2, 4, 7, 20], "classifi": 22, "classmethod": 0, "clean": 19, "clear": [2, 7, 17, 25], "clearli": [3, 7], "cli": 3, "client": [0, 1, 2, 4, 5, 6, 7, 8, 10, 15, 16, 17, 18, 19, 21, 22, 23, 24, 25, 26], "client_credenti": [0, 3], "client_id": [0, 3], "client_secret": [0, 3], "client_vers": [6, 21], "clientjob": 0, "clientsid": 15, "climat": 2, "climatologi": 2, "climatological_norm": [2, 23], "climatology_period": 2, "clip": [0, 2, 23], "clone": [7, 12, 19], "close": [0, 2, 4, 6, 7, 21, 26], "closest": [0, 2], "cloud": [0, 2, 3, 5, 7, 8, 12, 17, 18, 20], "cloud_cov": [0, 7, 12, 17], "cloud_detect": [2, 23], "cloud_detection_method": [0, 2], "cloud_detection_opt": [0, 2, 7], "cmap": [0, 12], "cmdoption": 2, "co": [1, 2, 23, 24], "coars": 0, "code": [0, 2, 4, 7, 12, 13, 20, 21, 24, 25], "coeffici": [0, 2, 7], "coher": 25, "col_1": 0, "col_2": 0, "collect": [0, 2, 3, 6, 7, 9, 10, 11, 14, 18, 20, 22, 25, 26], "collection_id": [0, 7], "collection_item": [0, 7], "collection_properti": [0, 7, 17], "collectionmetadata": [0, 7, 25], "collectionproperti": 0, "collis": 19, "color": 0, "colormap": 0, "colour": 0, "column": [0, 4, 7, 11, 22], "com": [0, 3, 9, 11, 12, 17, 18, 19, 21, 25], "combin": [0, 2, 7, 13, 17, 24, 25], "come": [3, 4, 5, 6, 17, 25], "comma": 25, "command": [0, 3, 4, 7, 19, 25], "comment": [0, 8, 25], "commit": 3, "common": [0, 2, 3, 4, 6, 9, 11, 14, 17, 20, 25, 26], "common_nam": [0, 2, 12], "commonli": [2, 24], "commonmark": 0, "commun": [3, 6, 21, 22], "compact": [4, 17], "compactli": [1, 2, 24, 26], "compar": [0, 2, 7, 9, 25], "comparablevers": 7, "comparison": [2, 4, 7], "compat": [0, 25, 26], "compil": [2, 19], "complet": [2, 3, 7, 17, 24], "complex": [0, 3, 4, 14, 21, 24, 25, 26], "compliant": [0, 2, 7, 9], "compon": [2, 4, 7], "compos": 12, "composit": 4, "comput": [0, 2, 7, 9, 12, 13, 14, 17, 20], "computation": 9, "compute_and_rescale_indic": [10, 14], "compute_index": [10, 14], "compute_indic": [10, 14], "computed_band_1": 25, "computed_band_2": 25, "concat": 0, "concaten": [0, 2], "concept": [3, 4, 17, 22], "conceptu": [1, 2], "concis": 0, "concret": [0, 1, 2, 19, 25], "conda": [7, 19], "condit": [0, 2, 7], "config": [7, 8, 19, 20], "configur": [0, 2, 3, 7, 19, 20], "conflict": [0, 2, 6, 7, 21, 22], "conftest": 0, "confus": [7, 19, 22, 25, 26], "confusingli": 22, "congrat": 4, "conn": 0, "connect": [5, 6, 7, 8, 9, 11, 12, 14, 15, 16, 17, 18, 20, 22, 23, 24, 25, 26], "connection_provid": 11, "connection_retry_interv": 0, "connector": 25, "consecut": 2, "consequ": [0, 25], "consid": [0, 2, 3, 6, 19, 21], "consider": 25, "consist": [0, 2, 6, 7, 12, 17, 26], "constant": [0, 2, 7, 23, 24], "constraint": [4, 13, 20], "construct": [0, 2, 4, 7, 16, 17, 20, 24, 25, 26], "constructor": [0, 26], "consult": [3, 11, 16, 26], "contain": [0, 2, 3, 4, 7, 8, 9, 11, 12, 14, 16, 17, 18, 19, 24, 25, 26], "content": [0, 2, 7, 12, 25], "context": [0, 2, 7, 8, 11, 20, 25], "continent": 13, "continu": 0, "contour": 0, "contrast": 2, "contribut": [0, 2, 20, 21], "contributing_area": [0, 2], "control": [0, 2, 3, 5, 6, 7, 8, 24, 25], "conveni": [0, 4, 7, 11, 13, 15, 25], "convent": [0, 6, 12, 14, 17, 19], "convers": [4, 7, 20, 24, 26], "convert": [0, 2, 4, 15, 24, 26], "convolut": [0, 2, 24], "cookbook": [7, 20], "coord": 25, "coord_i": 25, "coord_x": 25, "coordin": [0, 2, 4, 5, 7, 12, 17, 22, 25, 26], "copi": [0, 3, 25], "core": 24, "corner": [0, 2], "correct": [0, 2, 10, 25], "correctli": [7, 12, 24, 25], "correl": 25, "correspond": [0, 1, 2, 4, 5, 11, 18, 19, 22, 23, 24, 26], "corrupt": 5, "cosh": [2, 23, 24], "cosin": 2, "cost": [0, 4, 7, 9, 13, 17, 25], "could": [0, 2, 3, 5, 15, 16, 25, 26], "count": [0, 2, 4, 7, 11, 17, 23], "count_by_statu": 11, "count_tim": [0, 23], "counter": 7, "countmismatch": 2, "coupl": [0, 3, 4, 5, 6, 9, 17, 22, 24, 25, 26], "cours": 13, "cousin": 26, "cover": [0, 2, 4, 7, 11, 12, 13, 17, 18, 22, 24, 25, 26], "coverag": 12, "cpu": 0, "cr": [0, 2, 7, 12, 26], "creat": [0, 1, 2, 4, 7, 11, 14, 16, 18, 20, 24, 25, 26], "create_collect": 0, "create_data_cub": 2, "create_job": [0, 5, 7, 11, 13, 15, 18, 22], "create_job_db": [7, 11], "create_raster_cub": [7, 23], "create_servic": [0, 7], "creation": [5, 7, 10, 13], "creator": 11, "credenti": [0, 7, 20], "criteria": 17, "crop": 22, "cryptic": [0, 5, 7], "csv": [4, 7, 11], "csvjobdatabas": [7, 10, 11], "cube": [0, 1, 2, 5, 7, 11, 13, 14, 15, 16, 18, 20, 22], "cube1": [0, 2, 18], "cube2": [0, 2, 18], "cube_upd": 25, "cubearrai": 25, "cubemetadata": 7, "cubic": 2, "cubicsplin": 2, "culprit": 3, "cumbersom": 25, "cummax": [2, 23], "cummin": [2, 23], "cumproduct": [2, 23], "cumsum": [2, 23], "cumul": 2, "curl": [0, 7], "currenc": 7, "current": [0, 2, 3, 4, 5, 7, 8, 9, 11, 19, 24, 25], "curv": 2, "custom": [0, 3, 7, 8, 9, 11, 24, 25, 26], "cycl": 5, "d": 17, "d5b8b8f2": 5, "d7393fba": 3, "dai": [0, 2, 3, 17], "daili": [2, 3], "dask": [7, 12], "data": [0, 1, 2, 3, 5, 7, 10, 11, 12, 13, 14, 18, 20, 22, 25], "data_dict": 0, "data_list": 0, "data_paramet": 0, "data_typ": 12, "dataarrai": [0, 7, 12, 25], "databas": [3, 7, 11], "datacub": [2, 4, 5, 7, 9, 10, 13, 14, 15, 17, 20, 22, 23, 24, 26], "datacube_from_fil": 0, "datacube_from_flat_graph": [0, 7], "datacube_from_json": [0, 7, 16, 18], "datacube_from_process": [0, 7, 16, 18, 26], "datacube_list": 0, "datacube_plot": 0, "datacube_to_fil": 0, "datacubeempti": 2, "datafram": [0, 4, 7, 11, 21], "dataset": [0, 2, 9, 10, 17, 20, 25], "date": [0, 2, 3, 4, 7, 19, 24, 25, 26], "date1": 2, "date2": 2, "date_between": 2, "date_differ": 2, "date_range_filt": 7, "date_shift": [2, 23], "date_tim": [0, 26], "datetim": [0, 7, 17, 25], "datetime64": 12, "david": 14, "davidfrantz": 9, "db": 24, "dd": [2, 17], "debug": [0, 5, 7, 25], "decad": [0, 2], "decemb": [0, 2], "decid": [0, 9], "decim": 2, "deciph": 4, "decis": 9, "declar": [0, 7], "decod": [0, 7], "decompress": 7, "decor": 7, "decoupl": 3, "decreas": 2, "dedic": [17, 25], "dedupl": 0, "deep": 0, "deeper": 17, "deepli": 0, "def": [0, 1, 2, 7, 11, 24, 25], "default": [0, 2, 5, 7, 8, 9, 11, 16, 17, 18, 20, 24, 25, 26], "default_backend": [3, 8], "default_timeout": [0, 7], "defin": [0, 2, 3, 4, 5, 7, 10, 11, 14, 17, 18, 19, 20, 22], "definit": [0, 7, 11, 24], "degre": [0, 2, 16, 26], "dekad": [0, 2], "delet": [0, 5, 7], "delete_job": 0, "delete_servic": [0, 7], "delimit": 25, "deliv": 2, "delta": 2, "dem": [0, 2], "demand": [7, 21], "demo": [6, 12], "dep": 19, "depend": [0, 1, 2, 3, 5, 7, 9, 11, 13, 15, 17, 19, 20], "deprec": [0, 2, 11, 15, 25, 26], "depth": [0, 3, 4, 24], "dereference_from_node_argu": 7, "deriv": 22, "descend": 2, "describ": [0, 2, 3, 5, 7, 11, 16, 19, 21, 24], "describe_account": [0, 3], "describe_collect": [0, 4, 7, 17], "describe_job": [0, 7], "describe_process": [0, 7, 24], "descript": [0, 2, 5, 7, 8, 12, 13, 16, 19, 24, 26], "design": [6, 17, 25], "desir": [0, 2, 3, 4, 5, 7, 11, 18, 24, 25, 26], "desktop": 6, "destin": 0, "detail": [0, 2, 3, 4, 5, 6, 7, 8, 15, 16, 17, 18, 21, 24, 25, 26], "detect": [0, 2, 3, 7, 14, 24, 25], "determin": [0, 2, 11, 13, 14, 17], "dev": [19, 21], "develop": [0, 2, 6, 7, 20, 22, 25, 26], "deviat": [0, 2, 14], "devic": [0, 4, 7, 20], "df": [4, 7, 11], "dfn": 0, "dict": [0, 7, 11, 12, 14, 25], "dictionari": [0, 4, 7, 11, 14, 17, 18, 24, 25], "did": [0, 4, 7], "didn": 5, "differ": [0, 2, 3, 4, 8, 9, 14, 16, 17, 18, 19, 22, 25, 26], "digit": [0, 2, 4, 20, 25], "dilat": [2, 18], "dim": 25, "dimens": [0, 1, 2, 4, 7, 12, 22, 24, 25], "dimension": [0, 2, 25], "dimension_label": [0, 2, 7, 23], "dimensionalreadyexistsexcept": 7, "dimensionexist": [0, 2], "dimensionlabelcountmismatch": 0, "dimensionmismatch": 2, "dimensionnotavail": [0, 2], "dir": 0, "direct": [0, 5, 25], "directli": [0, 1, 2, 3, 4, 6, 7, 10, 11, 17, 18, 19, 20, 21, 22, 24, 25, 26], "directori": [0, 3, 5, 8, 11], "disabl": [0, 2, 7, 19], "disallow": 7, "disconnect": 4, "discov": [1, 2, 17, 25], "discoveri": [0, 2, 7, 20], "discuss": [0, 1, 2, 3, 4, 5, 8, 16, 19, 22, 24, 26], "disk": [0, 5], "displai": [0, 7], "dist": 19, "distanc": [2, 4], "distinct": [2, 7], "distribut": 25, "disturb": [0, 2], "distutil": 7, "div": 18, "dive": 17, "divid": [0, 2, 16, 18, 23, 24, 25, 26], "divide1": 26, "divide18": 26, "dividend": 2, "divis": 2, "divisor": 2, "djf": 2, "do": [0, 1, 2, 3, 4, 5, 7, 12, 13, 15, 17, 19, 22, 24, 25, 26], "doc": [0, 2, 5, 7, 9, 19], "docker": 19, "document": [0, 2, 3, 5, 6, 7, 17, 20, 21, 22, 24, 25, 26], "doe": [0, 2, 3, 4, 5, 7, 11, 17, 19, 24, 25], "doesn": [0, 2, 3, 5, 7], "domain": 7, "domini": [0, 2], "don": [0, 2, 3, 4, 7, 8, 14, 17, 21, 24, 25, 26], "done": [0, 3, 5, 11, 13, 17, 25], "doubl": [7, 15, 17, 21, 25], "down": [0, 2], "download": [0, 2, 7, 9, 11, 15, 17, 18, 19, 20, 22, 26], "download_fil": [0, 5, 7], "download_result": 0, "draft": [7, 26], "drastic": 17, "drawn": 0, "drive": 17, "driver": [4, 7, 25], "drop": [0, 2, 7, 19], "drop_dimens": [0, 2, 23], "dropbox": 17, "dropna": 4, "dtny": 3, "dtype": [7, 12], "due": [0, 2], "dump": [0, 3, 5, 15, 26], "duplic": 7, "durat": 0, "dure": [2, 3, 5, 7, 11, 19], "dynam": [0, 20, 25], "e": [0, 1, 2, 3, 4, 5, 6, 7, 11, 12, 13, 14, 15, 17, 19, 21, 22, 23, 24, 25, 26], "e4df8648": 7, "each": [0, 1, 2, 3, 4, 5, 11, 12, 19, 21, 24, 25], "earli": 4, "earlier": [0, 3, 4, 7, 18], "earth": [0, 2, 3, 4, 12, 14, 20, 22], "eas": 3, "easi": [0, 8, 21, 26], "easier": [0, 1, 2, 3, 5, 7, 22, 24, 25], "easiest": [3, 5, 19, 24], "easili": [0, 3, 4, 5, 7, 19, 21, 24, 25], "east": [0, 4, 6, 7, 9, 12, 17, 20, 24, 25, 26], "ecosystem": [0, 21], "edit": [3, 8, 19], "editor": [1, 2, 5, 25, 26], "effect": [2, 25], "effici": 25, "effort": [0, 4, 7], "egi": 4, "either": [0, 2, 11], "element": [0, 2, 12, 25], "element84": 12, "elev": [0, 2], "elevation_model": [0, 2], "elimin": [4, 7, 22, 25], "ellipsoid": [0, 2], "ellipsoid_incidence_angl": [0, 2], "els": [2, 6, 18, 21, 24], "email": 3, "emb": [0, 17, 25], "embed": 0, "empti": [0, 2, 7, 11, 19], "en": 2, "enabl": [0, 2, 3, 7, 19, 26], "encapsul": [4, 16, 18, 25, 26], "enclos": 2, "encod": [0, 4, 7, 15], "encount": [0, 26], "end": [0, 1, 2, 5, 7, 8, 12, 16, 18, 19, 20, 22, 24, 25, 26], "end_dat": [0, 17, 24], "endpoint": [0, 2, 7, 25], "enforc": [5, 7, 19], "engin": [6, 7], "enhanc": 4, "enough": [0, 4, 14, 17, 19, 21], "ensur": [7, 25], "ensure_job_dir_exist": 11, "enter": [3, 4, 19], "entir": 0, "entiti": [0, 17, 18], "entri": [0, 2, 25], "entrypoint": 25, "enum": 0, "enumer": 2, "env": [7, 19], "environ": [0, 5, 7, 8, 19, 21, 25], "eo": [0, 1, 2, 4, 7, 12, 13, 16, 17, 18, 19, 21, 22, 25], "ep": 7, "epsg": [0, 2, 7, 12, 17], "eq": [2, 7, 23, 24], "equal": [0, 2, 7], "equival": [0, 2, 17, 24], "era": [0, 2], "eros": 2, "error": [0, 2, 5, 7, 11, 17, 21, 24, 25], "esa": 9, "especi": [2, 3], "essenti": [0, 7, 17], "establish": [3, 4], "estim": [0, 7, 25], "estimate_job": 0, "etc": [0, 2, 3, 4, 5, 7, 14, 15, 19, 25, 26], "etcetera": 0, "eu": [4, 9], "euler": 2, "evalu": [0, 1, 2, 7, 16, 20, 25], "even": [3, 4, 5, 6, 7, 11, 19, 25, 26], "event": [0, 7], "eventu": 0, "everi": [2, 9, 12, 25], "everyth": [0, 4, 18, 24], "everywher": 25, "evi": [18, 20], "evi_aggreg": [4, 26], "evi_composit": 4, "evi_cub": [4, 25], "evi_cube_mask": [4, 25], "evi_mask": 4, "evi_timeseri": 26, "exact": 25, "exactli": [0, 2, 11, 18, 25, 26], "exampl": [0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 12, 13, 15, 16, 17, 19, 21, 22, 24], "example_aoi": 17, "exceed": 24, "except": [0, 2, 3, 5, 7, 9, 11, 25], "excess": 19, "exchang": 25, "exclud": [0, 2], "exclude_max": 2, "exclus": [0, 2], "execut": [0, 1, 2, 3, 7, 10, 12, 16, 18, 19, 20, 22, 26], "execute_batch": [0, 5, 7, 9, 25], "execute_local_udf": [0, 7, 25], "exist": [0, 2, 3, 4, 5, 7, 8, 11, 14, 19, 25], "exit": 3, "exp": [2, 23], "expand": [0, 2, 17], "expans": 24, "expect": [0, 2, 3, 12, 24, 25, 26], "expected_statu": 0, "expens": [9, 25], "experi": [4, 12, 19, 25], "experiment": [0, 2, 4, 7, 8, 11, 12, 14, 15, 16, 17, 22, 24, 25], "expir": [3, 7], "expiri": 3, "explain": [0, 3, 4, 20, 26], "explic": 3, "explicit": [5, 7, 8, 21, 24, 25], "explicitli": [0, 2, 3, 4, 5, 7, 14, 17, 18, 24, 25], "explor": 4, "expon": 2, "exponenti": 2, "export": [0, 3, 7, 10, 13], "export_path": 15, "expos": 26, "express": [0, 1, 2, 4, 7, 17, 24, 25], "extend": [0, 2], "extens": [0, 2, 7, 8, 22, 25], "extent": [0, 2, 4, 7, 9, 12, 20, 24, 26], "extern": [0, 2], "extra": [7, 11, 14, 19, 21], "extract": [0, 2, 4, 7, 13, 19, 26], "extract_udf_depend": [0, 7, 25], "extrema": [2, 23], "ey": [3, 5, 17, 21, 25], "f": [0, 4, 11, 15, 16, 26], "f9f4e3d3": 5, "fact": [5, 6, 15, 17, 25], "factor": [0, 2, 13, 24, 25], "factori": [0, 7], "fahrenheit": [16, 26], "fahrenheit_param": 26, "fahrenheit_to_celsiu": [16, 26], "fahrenheittocelsius1": 26, "fail": [0, 2, 5, 7, 18, 25], "failur": [3, 7], "fairli": [4, 25], "fall": [0, 3, 7, 24, 25], "fallback": [0, 3, 7, 11], "fals": [0, 2, 7, 14, 25], "fanci": [24, 25, 26], "fancy_load_collect": 26, "fancy_upsample_funct": 25, "fancyeo": 25, "far": [3, 4], "fashion": [24, 25], "faster": 6, "favor": [0, 7], "fe79": 5, "feasibl": 3, "featur": [0, 2, 3, 4, 7, 8, 9, 11, 12, 13, 15, 16, 17, 19, 22, 24, 25, 26], "feature_collect": 22, "feature_collection_list": 0, "feature_flag": 24, "featurecollect": [0, 2, 4, 22], "feb": [0, 2], "februari": [0, 2], "fedcba": [0, 2], "feder": 7, "feedback": 11, "feel": [5, 6, 19, 20, 21], "fetch": [0, 3, 5], "fetch_metadata": [0, 7], "few": 9, "fewer": 2, "field": [0, 2, 4, 7, 11, 25, 26], "figur": [6, 25], "file": [0, 2, 4, 5, 7, 11, 12, 13, 15, 16, 17, 18, 20, 21, 25], "file_format": 0, "file_list": 0, "filenam": [0, 7, 8, 15], "fill": [0, 2, 11, 14, 25], "filter": [0, 2, 4, 7, 9, 11, 20, 25], "filter_band": [0, 2, 7, 17, 23, 24], "filter_bbox": [0, 2, 7, 9, 17, 23, 24], "filter_label": [0, 2, 7, 23], "filter_spati": [0, 2, 7, 13, 23], "filter_tempor": [0, 2, 7, 9, 17, 18, 23, 24, 26], "filter_vector": [0, 2, 7], "final": [4, 9, 11, 17, 19, 22, 26], "find": [0, 2, 4, 11, 19, 20, 21, 25], "fine": [7, 14, 18, 19, 26], "finer": 17, "finetun": 24, "finish": [0, 3, 7, 11, 19, 22, 25], "finit": [0, 7], "firewal": 7, "first": [0, 2, 3, 4, 6, 7, 8, 9, 11, 12, 15, 17, 19, 20, 21, 22, 23, 24], "fit": [0, 2, 17], "fit_": 0, "fit_class_random_forest": [0, 2, 7, 22, 23], "fit_curv": [0, 2, 7, 23], "fit_regr_random_forest": [0, 2, 7, 22, 23], "fix": [2, 19, 25], "fixtur": 0, "flag": [0, 2, 7, 11, 16, 19], "flat": [0, 7, 16, 18], "flat_graph": [0, 15], "flatgraphablemixin": 0, "flatten": [0, 7], "flatten_dimens": [0, 2, 7, 23], "flawlessli": 2, "flesh": 4, "flexibl": [0, 11, 24], "flight": 7, "flip": 25, "float": [0, 2, 11, 25, 26], "float32": 12, "float64": 12, "floor": [2, 23], "flow": [0, 7, 20, 22, 24], "fmt": [0, 25], "fncy": 25, "focal": 0, "focu": [6, 22, 25], "focuss": [8, 26], "folder": [0, 2, 5, 7, 8, 11, 12, 19], "follow": [0, 2, 3, 4, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26], "font": 0, "fontsiz": 0, "foo": [0, 11], "forbidden": 7, "forc": [0, 9], "forest": [0, 2, 20], "forg": [7, 19, 21], "fork": 19, "form": [3, 7, 11, 17], "format": [0, 2, 4, 5, 6, 7, 9, 11, 13, 14, 15, 17, 18, 19, 25, 26], "format_opt": 0, "formatt": 19, "formatunsuit": 2, "former": [3, 22], "formula": [0, 4, 14], "forum": [7, 19, 21], "forward": 3, "found": [0, 2, 25], "four": [0, 2], "fourth": [0, 2], "fraction": [0, 2, 7], "frame": 11, "framework": 19, "free": [5, 17, 21, 24], "freedom": 25, "frequenc": [2, 5], "fresh": 21, "freshli": 19, "friendli": [7, 17], "friendlier": 7, "from": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 19, 20, 21, 22], "from_dict": 0, "from_fil": [0, 25], "from_flat_graph": 0, "from_metadata": 0, "from_netcdf_fil": 7, "from_nod": [4, 18, 26], "from_paramet": [7, 18, 24, 25, 26], "from_sequ": 0, "from_url": 0, "from_user_input": 0, "full": [0, 7, 9, 13, 25], "full_width_half_max": 12, "fulldataframejobdatabas": 7, "fulli": [0, 2, 4, 5, 11, 16, 19, 25], "function": [0, 1, 4, 6, 7, 8, 11, 12, 14, 15, 20, 21, 22, 23, 24], "fundament": 4, "further": [2, 3, 4, 5, 13, 18, 26], "fuse": 0, "fusion": 4, "futur": 25, "g": [0, 1, 2, 3, 4, 5, 6, 7, 11, 13, 14, 15, 17, 19, 21, 22, 24, 25, 26], "gamma0": [0, 2], "gap": 25, "gatewai": [4, 7], "gaussian": [0, 2], "gdal": 2, "gdalwarp": 2, "ge": 23, "gener": [2, 4, 5, 7, 8, 10, 12, 17, 20, 25, 26], "geo": [7, 24], "geodatafram": 25, "geojson": [0, 2, 4, 7, 11, 13, 15, 17, 22, 25, 26], "geometri": [0, 2, 4, 5, 7, 10, 15, 17, 22, 24, 25, 26], "geometriesoverlap": 0, "geometry_count": 2, "geometrycollect": 2, "geopanda": [11, 21, 25], "geoparquet": [0, 17], "geopyspark": [4, 7], "geosjon": 25, "geospati": 21, "geotiff": [0, 4, 5, 7, 9, 12, 17, 21], "geotrelli": [12, 25], "get": [0, 2, 3, 5, 6, 7, 11, 15, 17, 19, 20, 22, 24, 25, 26], "get_arrai": [0, 25], "get_asset": [0, 5, 9], "get_by_statu": 11, "get_datacube_list": 0, "get_error_log_path": 11, "get_feature_collection_list": 0, "get_fil": 0, "get_job_dir": 11, "get_job_metadata_path": 11, "get_metadata": [0, 5], "get_path": 0, "get_result": [0, 5, 7, 9], "get_results_metadata_url": [0, 7], "get_run_udf_callback": 0, "get_structured_data_list": 0, "getitem": 12, "getter": 11, "gfedcb": [0, 2], "gi": 22, "git": [12, 19, 21], "github": [0, 3, 7, 9, 12, 16, 17, 19, 21, 25], "give": [0, 4, 5, 6, 9, 17, 20, 21, 24, 25], "given": [0, 2, 3, 5, 7, 11, 14, 16, 17, 18, 24, 25, 26], "gl": 3, "glitch": 0, "glob": 0, "glob_pattern": 0, "global": 19, "glue": 25, "go": [4, 16, 19], "goal": [21, 25], "goe": 4, "golai": 25, "good": [17, 19, 25, 26], "googl": [3, 17], "got": 7, "gradual": [17, 19], "grain": 7, "grant": [3, 7], "granular": 17, "graph": [1, 2, 4, 5, 7, 10, 12, 16, 17, 20, 24, 25, 26], "graph_add_nod": 0, "graph_build": [0, 24], "graphic": [4, 5, 25], "gravit": 6, "grd": 9, "great": 25, "greater": [0, 2, 7], "greatli": [11, 19], "green": 12, "grid": [0, 2], "ground": [0, 2, 4], "group": [0, 2, 13, 18], "grown": 0, "gsd": 12, "gt": [2, 23], "gte": [2, 23], "gtiff": [0, 5, 9, 13, 22, 26], "guarante": [12, 25], "guess": [0, 2, 7, 14], "guid": [6, 14], "guidelin": [6, 25], "gz": 25, "h": [0, 3, 19], "h5netcdf": [7, 21], "ha": [0, 1, 2, 3, 4, 5, 6, 7, 9, 11, 14, 15, 17, 19, 22, 25], "hack": 7, "had": [0, 2, 5, 19], "half": 17, "handi": [3, 4, 17], "handl": [0, 2, 3, 4, 5, 7, 10, 12, 14, 15, 20, 24], "hang": 19, "happen": [4, 21], "hard": [3, 6, 9, 17], "hardcod": [7, 18, 26], "harder": 7, "hash": 25, "have": [0, 2, 3, 4, 5, 7, 9, 11, 12, 13, 14, 15, 16, 17, 19, 21, 22, 24, 25, 26], "haven": 14, "haze": 2, "header": [0, 3, 7], "heavi": 22, "heavier": 5, "heavili": [3, 4, 21], "height": [0, 2, 7], "help": [0, 1, 2, 3, 5, 25], "helper": [0, 1, 4, 5, 7, 8, 11, 14, 20, 21, 24, 26], "henc": [9, 17], "here": [0, 2, 3, 4, 6, 13, 16, 17, 18, 19, 22, 24, 25, 26], "hgfedc": [0, 2], "hhhhhh": [0, 2], "hidden": [7, 24], "hide": [0, 7, 26], "hierarchi": [0, 2, 7], "high": [6, 20], "high_resolution_band": 0, "higher": [0, 2, 25], "highest": 0, "highlight": [4, 6, 25], "hint": [1, 2], "histogram": [0, 7], "histori": 19, "hit": [4, 8, 17], "hoc": [6, 19], "home": [3, 8], "homebrew": 19, "homogen": 0, "honor": 0, "hook": 19, "horizont": [0, 2, 6], "host": [2, 19], "hour": [0, 2, 3, 4], "how": [0, 2, 3, 4, 5, 6, 11, 17, 19, 21, 24, 25, 26], "howev": [1, 2, 3, 4, 5, 8, 15, 16, 19, 22, 24, 25], "href": [0, 5, 16], "html": [0, 2, 7, 19], "http": [0, 2, 4, 5, 7, 9, 11, 12, 13, 15, 16, 17, 18, 19, 20, 24, 25], "hub": [4, 9, 17], "human": [0, 6], "hundr": 2, "hyperbol": 2, "hypercub": 0, "i": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26], "ic": 2, "icor": 9, "id": [0, 1, 2, 3, 4, 5, 7, 11, 12, 14, 16, 17, 18, 22, 24, 25, 26], "idea": [17, 25], "ideal": [3, 25], "ident": 3, "identifi": [0, 2, 3, 4, 5, 11], "if_": [2, 23], "ignor": [2, 7, 24], "ignore_nodata": 2, "illumin": 2, "illustr": [3, 4, 16, 17, 25, 26], "imag": [0, 2, 4, 5, 24, 25], "imagecollect": [0, 7], "imagecollectioncli": 7, "imageri": 2, "imagin": 18, "immedi": 2, "impact": [4, 17, 25], "implement": [0, 1, 2, 4, 7, 10, 11, 12, 13, 18, 21, 22, 24, 25], "impli": [0, 16], "implicit": [2, 7], "implicitli": 2, "import": [0, 1, 2, 3, 4, 5, 6, 7, 8, 11, 12, 13, 15, 16, 17, 18, 20, 21, 24, 25, 26], "imposs": [3, 6, 9], "impract": 13, "impress": 25, "improv": [0, 2, 6, 7, 11, 17, 25], "imshow": 12, "inaccuraci": 2, "incid": [0, 2], "includ": [0, 2, 3, 5, 7, 11, 13, 25], "include_stac_metadata": 0, "inclus": [0, 2], "incomplet": 2, "inconsist": 25, "incorrectli": 24, "increas": [0, 2, 17], "increment": [0, 2, 18], "indefinit": 11, "indent": [0, 4, 6, 26], "independ": [0, 2], "index": [0, 2, 4, 7, 14, 20, 25], "index_dict": 14, "indic": [0, 2, 4, 7, 10, 13], "indirectli": 0, "individu": [0, 2, 4, 13, 25], "infer": [17, 25], "infinit": [0, 2], "info": [0, 2, 5, 7, 8, 11], "inform": [0, 2, 3, 4, 5, 7, 9, 11, 14, 16, 17, 19, 21, 24, 26], "infrar": [4, 14], "infrastructur": 12, "inher": 2, "ini": [3, 8], "init_pixel_size_i": 25, "init_pixel_size_x": 25, "initi": [0, 2, 3, 7, 11, 20, 25], "initialize_from_df": [7, 11], "inject": [19, 26], "inlin": [0, 4, 11, 24, 25], "inner": 2, "input": [0, 2, 4, 7, 9, 13, 14, 24, 25, 26], "input_max": [0, 7], "input_metadata": 25, "input_min": [0, 7], "input_rang": 14, "inputmax": [0, 2], "inputmin": [0, 2], "inputs_cub": 25, "insensit": [0, 2], "insert": [2, 24], "insid": [0, 1, 2, 7, 16, 17, 22, 25], "inspect": [0, 2, 4, 5, 7, 23, 24, 25, 26], "inspir": 20, "instal": [3, 7, 10, 20, 25], "instanc": [0, 1, 2, 4, 5, 7, 13, 17, 18, 22, 24, 25, 26], "instant": [2, 17, 25], "instanti": 11, "instead": [0, 1, 2, 3, 4, 7, 14, 15, 17, 18, 19, 22, 24, 25, 26], "institut": [3, 12], "instruct": [5, 15, 25], "instrument": 0, "int": [0, 2, 9, 11, 23, 25], "int32": 12, "integ": [0, 2, 7, 25, 26], "integr": [0, 4, 7, 21], "intend": [0, 1, 2, 3, 15, 24], "intens": 5, "intention": [3, 19], "interact": [0, 1, 2, 4, 5, 7, 8, 15, 20, 21, 25], "intercept": 3, "interest": [3, 4, 5, 17], "interesting_rdd_id": 25, "interfac": [2, 11, 20], "intermedi": [17, 18], "intern": [2, 6, 7, 15, 19, 24], "interoper": [0, 7, 15], "interpol": 2, "interpolate_na": 25, "interpret": 0, "interrupt": [7, 11], "intersect": [0, 2], "interv": [0, 2, 3, 26], "introduc": [7, 19, 21, 25], "intrus": 19, "intuit": [11, 24], "invalid": [0, 2, 7, 15, 19], "invalidtimeseriesexcept": 0, "invalidvalu": 0, "invers": 2, "invert": [0, 2], "investig": 5, "invit": 6, "invoc": 0, "invok": [7, 9, 11, 24, 25, 26], "involv": [3, 7], "inward": 2, "io": 7, "ipyleaflet": 0, "irrelev": 6, "is_infinit": [2, 23], "is_nan": [2, 23, 24], "is_nodata": [2, 23], "is_valid": [0, 2, 7, 23, 24], "isol": 25, "issu": [0, 3, 7, 15, 19, 21, 24], "issuer": 3, "item": [0, 2, 5, 7, 10, 18, 22, 24, 26], "item_asset": 7, "item_schema": 0, "iter": [0, 5, 11, 24], "its": [0, 2, 3, 4, 5, 6, 7, 11, 12, 14, 16, 17, 21, 26], "itself": [0, 2, 3, 7, 11, 19, 24], "j0hn123": 3, "j9a7k2": 5, "januari": 2, "jenkin": 19, "jep": 25, "jja": 2, "job": [1, 2, 3, 7, 9, 10, 13, 15, 18, 20, 22, 25, 26], "job_db": 11, "job_id": [0, 5, 7, 11, 22], "job_list": 0, "job_log": [0, 7], "job_manag": 11, "job_opt": [0, 7, 25], "job_result": [0, 7], "job_start": 11, "jobdatabaseinterfac": [7, 10, 11], "joblogentri": 7, "jobresult": [0, 5, 7], "jobs_df": 11, "john": [0, 3], "johndo": 16, "join": 0, "json": [0, 3, 4, 5, 7, 10, 11, 12, 16, 19, 20, 24, 25, 26], "jsonbin": 15, "juli": 16, "june": [0, 2, 12], "jupyt": [4, 7, 12, 15, 17, 20, 21, 25], "just": [0, 2, 3, 4, 5, 6, 7, 14, 15, 17, 18, 19, 22, 24, 25, 26], "kcachegrind": 25, "keep": [0, 1, 2, 3, 4, 5, 6, 7, 17, 19, 21, 24, 25], "kei": [0, 2, 11, 25], "kept": [0, 3], "kernel": [0, 2, 18, 26], "kerneldimensionsuneven": 2, "keyword": [0, 7, 24], "kind": [0, 1, 2, 3, 18, 19, 21, 24, 25], "klnx": 3, "know": [3, 5, 19, 25], "knowledg": [15, 24, 25], "known": [0, 4, 17], "kwarg": [0, 7, 11, 24], "l19": 12, "l1c": 9, "l26": 12, "l2a": [12, 13], "lab": 20, "label": [0, 2, 25], "label_separ": [0, 2], "labelexist": 2, "labelnotavail": 2, "labelsnotenumer": 2, "lack": 7, "laid": 26, "lambda": [0, 1, 2, 4, 7, 17, 20, 24, 25], "lanczo": 2, "land": [0, 2, 7, 22], "landsat8": [7, 14], "languag": [6, 17, 24], "larg": [0, 2, 7, 13, 20, 25], "larger": [0, 3, 4, 5, 11, 13, 19, 24], "largest": 2, "last": [0, 2, 4, 5, 17, 23, 24, 25], "lat": 17, "later": [0, 2, 4, 17], "latest": [0, 7, 9, 19, 21], "latitud": [0, 17], "latter": [2, 3, 8, 22], "layer": [3, 4], "lazi": 12, "lazili": 7, "lc": 18, "le": 23, "lead": [0, 2, 3, 6, 15, 17], "leaf": 0, "leap": 2, "learn": [0, 2, 7, 20], "least": [0, 2, 7, 15, 18, 19, 24, 25, 26], "leav": [0, 11], "left": [0, 2, 7], "leftov": 7, "legaci": [0, 5, 7, 10, 25, 26], "legend": 0, "length": [2, 20], "less": [0, 2, 7], "let": [1, 2, 3, 4, 11, 12, 18, 19, 22, 26], "level": [2, 5, 7, 11, 17, 19, 20, 25], "levelnam": 11, "leverag": [0, 4, 11, 14, 19, 24], "librari": [0, 3, 4, 5, 6, 7, 8, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26], "licens": 17, "life": 5, "lifetim": 3, "like": [0, 1, 2, 3, 4, 5, 6, 7, 9, 11, 14, 15, 16, 17, 18, 21, 22, 24, 25, 26], "likewis": [3, 24, 26], "limit": [0, 2, 3, 4, 7, 11, 17, 24, 25], "line": [0, 2, 3, 7, 19, 20, 21, 25], "linear": [0, 2], "linear_scale_rang": [0, 2, 7, 23], "link": [0, 2, 4, 7, 11, 16, 17, 24], "linspac": 25, "linter": 19, "linux": [3, 19, 25], "list": [0, 2, 4, 6, 7, 11, 14, 16, 17, 18, 19, 20, 24, 25, 26], "list_collect": [0, 12, 17], "list_collection_id": [0, 4, 17], "list_fil": [0, 7], "list_file_format": [0, 17], "list_file_typ": 0, "list_indic": [10, 14], "list_job": [0, 5, 7], "list_output_format": 0, "list_process": [0, 24], "list_processgraph": 7, "list_result": 0, "list_servic": [0, 7], "list_service_typ": 0, "list_udf_runtim": [0, 25], "list_user_defined_process": 0, "liter": 24, "live": [3, 19], "ll": [0, 4, 25, 26], "ln": [0, 2, 7, 23], "load": [0, 2, 3, 7, 8, 9, 10, 11, 12, 15, 18, 20, 21, 22, 24, 25, 26], "load_byt": 0, "load_collect": [0, 2, 4, 5, 6, 7, 9, 11, 12, 13, 14, 15, 17, 20, 22, 23, 24, 25, 26], "load_dataset": 21, "load_disk_collect": [0, 7], "load_disk_data": 0, "load_geojson": [0, 2, 7, 23], "load_json": [0, 5], "load_json_resourc": 0, "load_ml_model": [0, 2, 7, 22, 23], "load_my_vector_cub": 24, "load_result": [0, 2, 7, 23], "load_stac": [0, 2, 7, 12, 23, 26], "load_stac_from_job": [0, 7], "load_uploaded_fil": [0, 2, 7, 23], "load_url": [0, 2, 7, 15], "loadcollection1": [4, 15, 26], "loaiza": 14, "local": [0, 2, 3, 4, 7, 10, 11, 15, 16, 18, 19, 20], "local_collect": 12, "local_conn": 12, "local_data_fold": 12, "local_incidence_angl": [0, 2], "localconnect": [7, 12], "localprocess": [7, 12], "locat": [0, 3, 4, 13, 24], "log": [2, 4, 7, 11, 20, 23], "log001": 5, "log002": 5, "log003": 5, "log10": [0, 7, 23], "log2": [0, 7, 23], "log_level": 0, "logarithm": [0, 2, 7, 23], "logentri": [0, 7], "logic": [0, 2, 7, 25], "logical_and": [0, 23], "logical_or": [0, 23], "long": [0, 2, 5, 6, 7, 17, 20, 25], "long_nam": 14, "longer": [2, 3, 4], "longitud": [0, 17], "look": [0, 2, 4, 6, 11, 12, 13, 15, 16, 17, 24], "lookup": 7, "loop": [5, 7, 11, 24], "loosevers": 7, "lot": [4, 17, 24, 25], "lousyeo": 25, "low": [4, 17], "low_resolution_band": 0, "lower": [0, 2, 6, 15], "lowercas": 0, "lps22": 20, "lt": [2, 12, 23], "lte": [2, 23], "luckili": [3, 24], "m": [2, 19], "machin": [0, 2, 3, 7, 20, 25], "magic": 19, "mai": [0, 2, 4, 5, 7, 9, 11, 13, 17, 24, 25], "main": 0, "mainli": [0, 5, 15, 24, 25], "maintain": [8, 19, 25], "mainten": [7, 20, 21, 25], "major": [0, 18, 25], "make": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 17, 19, 21, 22, 25], "makefil": 19, "mam": 2, "manag": [0, 4, 5, 7, 8, 10, 20, 24], "mani": [0, 2, 4, 7, 11, 17], "manipul": [0, 2, 25], "manner": [3, 17, 25], "manual": [3, 5, 7, 8, 10, 15, 19, 25], "map": [0, 2, 7, 10, 11, 18, 19, 20, 24, 25, 26], "march": [0, 2, 17], "mark": [7, 19], "mask": [0, 2, 7, 17, 18, 20, 23, 24], "mask_polygon": [0, 2, 7, 15, 23], "mask_resampl": 4, "mask_valu": [0, 2], "masked_s2": 18, "massag": 4, "master": [17, 19], "match": [0, 2, 11, 17, 25], "math": [7, 20, 24, 25, 26], "mathemat": [4, 24, 26], "matplotlib": [0, 21], "max": [0, 2, 4, 7, 11, 20, 23, 24, 25], "max_cloud_cov": [0, 7, 17], "max_poll_interv": 0, "max_poll_tim": [0, 7], "max_tim": [0, 4, 20, 23], "max_vari": 0, "maxima": 2, "maximum": [0, 2, 4, 7, 11, 17, 20, 24], "md": 19, "mean": [0, 1, 2, 3, 4, 11, 15, 17, 18, 22, 23, 24, 25, 26], "mean_tim": [0, 23], "meant": [0, 2], "meanwhil": 3, "measur": [2, 3], "mechan": 26, "med": 2, "media": 0, "median": [0, 2, 4, 12, 23, 24], "median_tim": [0, 23], "medium": 0, "memori": [0, 7], "mention": [4, 18], "merg": [0, 2, 6, 7, 11, 18, 19], "merge_cub": [0, 2, 7, 23], "messag": [0, 2, 3, 5, 7, 11, 19, 21, 25], "meta": [19, 25, 26], "metadaa": 0, "metadata": [2, 4, 5, 7, 9, 11, 12, 14, 16, 17, 19, 20, 26], "metadata_from_stac": 7, "meter": [0, 2, 4], "method": [0, 1, 2, 4, 5, 6, 7, 9, 11, 13, 15, 16, 17, 18, 20, 22, 23, 25, 26], "metric": 0, "micromet": 2, "microsecond": 7, "microsoft": 3, "midnight": [2, 17], "might": [0, 2, 3, 6, 14, 17, 19, 21, 25, 26], "migrat": 7, "millisecond": 2, "min": [0, 2, 23, 25], "min_tim": [0, 23], "mind": 4, "minim": [6, 7, 26], "minima": 2, "minimum": [0, 2, 7, 13, 17], "minor": [7, 19], "minu": 2, "minuend": 2, "minut": [2, 3, 4, 5, 7], "mirror": [0, 2], "miscellan": [10, 20], "mislead": 0, "mismatch": 7, "miss": [2, 7, 26], "mistak": [7, 25], "mistakenli": [7, 24], "mix": [1, 2, 7, 18, 24], "mixin": 0, "mjjaso": 2, "ml": [0, 2, 22], "mlmodel": [7, 20, 22, 23], "mm": [2, 17], "mobil": 3, "mod": [2, 23], "mode": [0, 2, 7, 8, 19, 21], "model": [0, 2, 7, 13, 22, 25], "modif": 2, "modifi": [0, 2, 12], "modu": 25, "modul": [0, 2, 6, 7, 20, 21, 24, 26], "modulenotfounderror": 21, "modulo": 2, "moment": [8, 19], "monitor": [4, 5, 6], "montero": 14, "month": [0, 2, 7, 12], "monthli": [2, 3], "more": [0, 1, 2, 3, 4, 5, 6, 7, 9, 11, 12, 13, 14, 16, 17, 18, 19, 20, 21, 24, 25], "moreov": [5, 15, 18, 24, 25], "mortem": 7, "most": [2, 3, 4, 5, 9, 11, 12, 13, 14, 17, 18, 19, 24, 25, 26], "mostli": [0, 3, 8], "mother": 6, "move": [0, 7], "msphinx": 19, "much": [1, 2, 3, 4, 13, 17, 20, 25, 26], "multi": [0, 2, 10, 18, 20, 26], "multi_result": [0, 18], "multibackendjobmanag": [7, 10, 11, 21], "multilevel": 0, "multipl": [0, 2, 3, 5, 6, 7, 11, 13, 14, 17, 20, 24, 25], "multipli": [0, 2, 18, 23], "multiplicand": 2, "multiply1": 4, "multiply3": 4, "multipoint": 2, "multipolygon": [0, 2], "multiresult": [7, 18, 20], "must": [0, 2, 8, 18, 25], "my": [0, 3, 15, 25], "my_bbox": 0, "my_process": [11, 25], "my_reduc": [1, 2], "my_udf": 25, "my_udp": 18, "myclient": 0, "n": [0, 2, 5, 7, 12, 14, 19], "nadir": 2, "naiv": 15, "name": [0, 2, 3, 5, 7, 11, 12, 14, 17, 18, 19, 20, 21, 24, 26], "namespac": [0, 7, 10, 11], "nan": [2, 23], "nativ": [2, 17], "natur": [2, 25], "nc": [12, 25], "ndarrai": [0, 12], "ndgi": 7, "ndim": 25, "ndjfma": 2, "ndmi": [7, 14], "ndvi": [0, 2, 4, 5, 7, 12, 14, 20, 23, 24], "ndvi_10m": 20, "ndvi_median": 12, "ndwi": 4, "nearest": [0, 2], "necessari": [0, 3, 5, 7, 14, 18, 19, 21, 22, 25], "necessarili": [0, 13, 25], "need": [0, 2, 3, 4, 5, 9, 11, 17, 19, 21, 25, 26], "neg": 2, "neighbor": [0, 2], "neighborhood": [2, 25], "neighbour": 2, "neighbourhood": 0, "neq": [2, 7, 23], "nest": [0, 2, 7], "net": 3, "netcdf": [0, 4, 7, 12, 13, 17, 21, 25], "netcdf4": 21, "network": [0, 3], "networkx": 12, "never": [0, 1, 2, 3, 25], "new": [0, 1, 2, 3, 4, 7, 11, 12, 14, 16, 19, 24, 25], "new_metadata": 25, "newli": [0, 2, 5, 19], "newlin": 0, "next": [0, 2, 3, 4, 7, 13, 17, 25], "nice": [3, 4, 12, 17, 19], "nicer": 7, "nir": [0, 2, 4, 12, 14, 18, 26], "nnnnnn": [0, 2], "nodata": [2, 12], "nodataavail": 0, "node": [0, 7, 16, 20, 24, 25, 26], "nois": [0, 2], "noise_remov": [0, 2], "nomin": 0, "non": [0, 2, 4, 7, 14, 19, 20, 24], "none": [0, 4, 7, 11, 14, 19, 25, 26], "nor": 7, "normal": [0, 1, 2, 3, 4, 7, 8, 9, 14, 17, 19, 24], "normalize_cr": 0, "normalize_log_level": 0, "normalized_differ": [0, 2, 23], "north": [0, 4, 6, 7, 9, 12, 17, 20, 24, 25, 26], "not_": [2, 23], "notabl": [7, 18], "notat": [0, 7, 24], "note": [0, 1, 2, 3, 4, 5, 8, 9, 11, 13, 15, 16, 17, 18, 19, 24, 25, 26], "notebook": [4, 5, 6, 7, 12, 17, 21, 25], "noth": 0, "notic": 17, "notori": 6, "novemb": [0, 2], "now": [2, 3, 4, 5, 7, 11, 12, 15, 18, 19, 22, 24, 25, 26], "nowadai": 6, "np": 25, "nrb": 2, "null": [0, 2, 7, 26], "num": 25, "num_tre": 0, "number": [0, 2, 4, 9, 11, 13, 16, 17, 18, 20, 21, 24, 25, 26], "numer": [0, 2], "numpi": [12, 24, 25], "o": [7, 17], "oauth": 3, "obfuscate_auth": 0, "object": [0, 2, 3, 4, 7, 11, 12, 17, 18, 20, 24, 25, 26], "observ": [0, 3, 4, 14, 17, 20, 22, 25], "obtain": [0, 2, 3, 4, 7, 19], "obvious": 14, "occasion": [3, 24], "occlus": 2, "occur": [0, 2], "octob": [0, 2], "off": [2, 8], "offer": [4, 6, 9, 17, 24], "offici": [2, 4, 7, 19, 20, 24, 26], "offlin": 25, "offset": 0, "often": [0, 2, 3, 13, 19, 24, 25, 26], "ogc": [0, 2], "ogr": 2, "oidc": [0, 4, 7, 8, 20], "oidc_auth_renew": 0, "oidc_auth_user_id_token_as_bear": 7, "oidcauthent": 0, "oidcbearerauth": 3, "oidcdevicecodepolltimeout": 7, "oidcexcept": 7, "oidcprovid": 3, "old": [0, 2, 7, 15, 25], "older": [0, 15, 25], "olivierhagol": 9, "omit": [0, 11, 25], "on_job_cancel": 11, "on_job_don": [7, 11], "on_job_error": 11, "onc": [2, 3, 4, 5, 17, 19, 25], "one": [0, 2, 3, 4, 6, 7, 9, 11, 12, 13, 18, 19, 21, 24, 25], "onelin": 19, "ones": [0, 2, 24], "onli": [0, 1, 2, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 24, 25, 26], "onlin": 24, "onto": 2, "op": 7, "open": [0, 2, 4, 6, 7, 12, 15, 16, 17, 19, 21, 26], "openeo": [1, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16, 18, 19, 21, 22, 24, 26], "openeo_auth_client_id": [0, 3], "openeo_auth_client_secret": [0, 3], "openeo_auth_method": [0, 3], "openeo_auth_provider_id": [0, 3, 7], "openeo_basemap_attribut": 7, "openeo_basemap_url": 7, "openeo_client_config": 8, "openeo_config_hom": 8, "openeo_processes_dask": 7, "openeo_udf": 7, "openeoapierror": 7, "openeoapiplainerror": 7, "openeopycli": 19, "opengeospati": 2, "openid": [0, 4, 7, 8, 20], "oper": [0, 1, 2, 3, 4, 7, 9, 13, 21, 24, 25, 26], "operand": 2, "operandi": 25, "opinion": [0, 6], "opposit": 0, "optic": [0, 2, 9, 17], "optim": [0, 2, 13, 25], "option": [0, 2, 4, 5, 7, 9, 11, 14, 18, 19, 20, 24, 25], "or_": [2, 23], "orbit": 17, "order": [0, 2, 4, 5, 7, 8, 23, 25], "orfeo": 9, "org": [0, 2, 5, 9, 17, 19], "organ": [3, 21], "organis": [3, 4], "orient": [6, 24], "origin": [0, 2, 5, 7, 14, 19, 22, 24, 25], "orthorectifi": [0, 7], "oschmod": 7, "other": [0, 2, 3, 4, 5, 7, 9, 11, 15, 16, 19, 21, 22, 25, 26], "otherwis": [0, 2, 3, 4, 5, 11], "our": [4, 12, 25, 26], "out": [0, 3, 4, 5, 6, 7, 8, 11, 17, 21, 24, 25, 26], "out_format": [0, 5, 7, 13], "outdat": 7, "outer": 2, "output": [0, 2, 4, 7, 13, 14, 15, 17, 18, 19, 24, 25], "output_cub": 25, "output_fil": 11, "output_max": [0, 7], "output_min": [0, 7], "output_rang": 14, "outputfil": 0, "outputmax": [0, 2], "outputmin": [0, 2], "outsid": [0, 2, 3], "outward": 2, "over": [0, 2, 4, 5, 6, 7, 25], "overal": [2, 9], "overhead": [7, 25], "overlap": [0, 2, 17, 25], "overlap_resolv": [0, 2], "overli": 2, "overrid": [0, 11], "overridden": 11, "overrul": 0, "oversampl": 0, "overview": [5, 19], "overwrit": [0, 6], "own": [5, 6, 11, 25, 26], "owner": 0, "ozon": 2, "p": [0, 2, 25], "p1": 18, "packag": [7, 11, 19, 21, 25], "pad": [0, 2], "page": [0, 3, 4, 7, 19, 20, 21], "pagin": [0, 7], "pair": [0, 2, 17], "panda": [0, 4, 11, 25], "pansharpen": 0, "parallel": 11, "parallel_job": 11, "paramet": [0, 2, 7, 9, 11, 14, 16, 18, 20, 24, 25], "parameter": [0, 7, 10, 24], "parameter_column_map": 11, "parameter_default": 11, "parametr": [2, 9], "parcel": [13, 17], "parent": [0, 2, 24], "parenthesi": 6, "parquet": [7, 11, 17, 21], "parquetjobdatabas": [7, 10, 11], "pars": [0, 7, 12, 25], "parse_d": 7, "parse_date_or_datetim": 7, "parse_datetim": 7, "parser": 12, "part": [0, 2, 3, 4, 7, 19, 24, 25], "parti": 12, "partial": [0, 7, 11], "particular": [0, 3, 17, 19, 25], "pass": [0, 2, 3, 7, 11, 15, 17, 18, 19, 22, 25, 26], "passphras": 0, "password": [0, 3], "past": [3, 19, 25], "path": [0, 2, 3, 7, 8, 11, 15, 16, 18, 19, 25], "pathlib": [0, 15, 25], "pattern": [0, 2, 4, 6, 7], "payload": 17, "pd": [4, 11], "peek": 4, "penalti": 25, "peopl": [3, 6, 22], "pep": [0, 7, 25], "pep8": 6, "per": [0, 2, 11, 13, 25], "percentag": 0, "perform": [0, 2, 4, 9, 10, 12, 17, 24, 25], "period": [0, 2, 3, 4, 5], "perm": 3, "permiss": [3, 4, 7, 19], "permissionerror": 7, "permut": 2, "persist": [0, 7, 11, 18], "person": [3, 6, 19], "pg": [0, 12, 26], "pgnode": [0, 2, 7], "pgnodegraphunflatten": 7, "phenologi": 0, "phone": 3, "physic": [2, 4, 20, 25], "pi": [2, 23], "pick": [3, 6, 11], "piggyback": 19, "pip": [12, 19, 25], "pipelin": [4, 24], "pipx": 19, "pitfal": 17, "pixel": [0, 1, 2, 4, 13, 20, 22, 24], "pkce": [0, 7], "place": [0, 4, 6, 8, 19, 24, 25], "placehold": [0, 1, 2, 19], "plai": [0, 3, 6, 18, 24, 25], "plain": 7, "plan": [0, 7, 19], "platform": [0, 7, 12, 14, 19], "pleas": [0, 2, 12, 25, 26], "plenti": 6, "plot": [0, 7, 12, 21], "plu": 0, "plugin": 19, "plural": 5, "point": [0, 2, 3, 4, 5, 13, 17, 18, 22, 26], "pointer": 25, "polici": 17, "poll": [0, 3, 5, 7, 11], "poll_sleep": 11, "pollut": 21, "polygon": [0, 2, 4, 5, 7, 13, 17, 26], "polygonal_histogram_timeseri": [0, 7], "polygonal_mean_timeseri": [0, 7], "polygonal_median_timeseri": [0, 7], "polygonal_standarddeviation_timeseri": [0, 7], "popular": 6, "portabl": [0, 2], "posit": [0, 2, 7], "possibl": [0, 4, 5, 8, 9, 12, 13, 14, 15, 16, 17, 18, 21, 24, 25, 26], "possibli": [0, 7], "post": [0, 5, 7, 19], "postprocess": 25, "potenti": 0, "power": [0, 2, 23, 24], "pq": 17, "pr": 19, "practic": [0, 7, 11, 20, 25], "pre": [0, 2, 7, 17, 20, 25, 26], "preced": 25, "precis": [1, 2], "predefin": [0, 7, 24], "predic": [0, 2], "predict": [0, 2, 22, 25], "predict_": 0, "predict_curv": [0, 2, 7, 23], "predict_random_forest": [0, 2, 7, 22, 23], "predicted_arrai": 25, "predicted_cub": 25, "predictor": [0, 22], "prefer": [5, 19, 24, 25, 26], "prefix": [0, 25], "prepar": [19, 25, 26], "prepend": 25, "preprocess": [0, 4, 9, 17], "prescrib": 24, "present": [2, 17, 25], "preserv": [0, 2, 7, 25], "press": 3, "pretti": 5, "prevent": [0, 7], "preview": [0, 7, 21], "previou": [2, 4, 12, 17, 25], "previous": [0, 4, 6], "primari": 25, "primit": 26, "principl": [2, 6, 15, 17], "print": [0, 3, 4, 7, 8, 15, 16, 19, 21, 24, 26], "print_json": [0, 7, 15, 26], "print_stat": 25, "prior": 25, "prioriti": [0, 2], "privaci": 3, "privat": [3, 16], "privatejsonfil": 7, "probabl": [0, 2, 4], "probe": 8, "problem": [2, 17, 19, 24, 25], "procedur": [0, 3, 24], "process": [1, 4, 5, 7, 9, 10, 13, 17, 19, 20, 22], "process_graph": [0, 4, 15, 18, 26], "process_id": [0, 4, 11, 15, 16, 18, 24, 26], "process_map": 19, "process_with_nod": [0, 7], "processbasedjobcr": [7, 10], "processbuild": [0, 1, 7, 20, 23, 24], "processbuilderbas": 0, "processes_dict": 0, "processgraphunflatten": 7, "processgraphvisitexcept": [7, 18], "processgraphvisitor": 7, "produc": [0, 11, 18, 25], "product": [0, 2, 9, 17, 23], "product_uri": 12, "profil": [3, 6, 7, 20], "profile_dump": 25, "program": [2, 6, 24], "programmat": [4, 5, 17], "progress": [0, 4, 5, 7], "proj": 0, "project": [0, 2, 6, 7, 12, 13, 14, 17, 19, 21, 26], "prolept": [0, 2], "propag": 0, "proper": [7, 25], "properli": [0, 3, 4, 5, 7, 14, 17, 19, 21, 24, 26], "properti": [0, 2, 4, 5, 7, 12, 13, 16, 20, 22, 25, 26], "propos": [2, 7, 19], "proprietari": [0, 2], "protect": 3, "protocol": [3, 7], "provid": [0, 2, 3, 4, 5, 7, 9, 11, 12, 14, 15, 16, 17, 18, 20, 22, 24, 25, 26], "provider_id": [0, 3], "pry": 3, "pseudocod": 24, "pstat": 25, "public": [10, 12, 13, 17], "publicli": [3, 4, 10, 26], "publish": [10, 18, 19], "pull": 0, "pure": [19, 21], "pureposixpath": 0, "purpos": [0, 2, 12, 17, 25], "push": 19, "put": [0, 2, 3, 4, 5, 6, 25], "px": 25, "py": [0, 7, 12, 19, 25], "py3": [19, 25], "pyarrow": [11, 21], "pypi": [19, 21], "pyprof2calltre": 25, "pyproj": [0, 7], "pystac": 7, "pytest": [0, 19], "python": [0, 1, 2, 3, 4, 5, 6, 7, 8, 12, 15, 16, 17, 18, 19, 21, 22, 23, 24, 26], "q": 2, "q1": 2, "q3": 2, "q7znsy": 3, "quadrat": 2, "quantil": [2, 23], "quartil": 2, "queri": [4, 17, 24], "question": 17, "queu": [0, 5], "quick": [4, 6, 25], "quit": 3, "quot": [15, 25], "r": [2, 14, 25, 26], "r8dh": 22, "radar": [0, 2, 9], "radian": 2, "radiometr": [0, 2], "rainbow": 0, "rais": [0, 2, 3, 5, 7, 11, 24, 25], "random": [0, 2, 20], "rang": [0, 2, 4, 14, 17, 25, 26], "rare": [0, 19], "raster": [0, 2, 5, 12, 13, 22, 24, 25], "raster_cub": [0, 26], "raster_to_vector": 0, "rasterspec": 12, "rather": [13, 25], "raw": [0, 4, 7, 9, 10, 13, 16, 17, 18, 22, 25, 26], "raw_json": 18, "rc1": 7, "rdd": 25, "rdd_": 25, "rdylbu_r": 0, "re": [0, 2, 3, 7, 19, 21, 24, 26], "reach": [2, 5, 21], "read": [0, 2, 6, 7, 11, 12, 17, 21], "read_text": 25, "read_vector": [0, 7, 10], "readabl": [0, 6, 7, 25], "reader": 6, "readi": [10, 20], "readili": [18, 25], "real": [4, 18], "realiti": 24, "realm": 4, "rearrang": [2, 23], "reason": [0, 3, 6, 17, 24, 25], "rebuild": 19, "receiv": [0, 2, 3, 5, 24, 25], "recent": [19, 25], "recip": 19, "recogn": 14, "recommend": [0, 2, 3, 4, 5, 7, 13, 17, 19, 20, 21, 25], "reconnect": 4, "reconstruct": 15, "rectangular": 2, "red": [0, 2, 4, 12, 14, 18, 26], "redact": 3, "redirect": 7, "reduc": [0, 1, 2, 3, 4, 7, 12, 15, 17, 22, 24, 26], "reduce_band": 0, "reduce_bands_udf": 0, "reduce_dimens": [0, 1, 2, 7, 12, 20, 22, 23, 24], "reduce_spati": [0, 2, 7, 23], "reduce_tempor": [0, 7], "reduce_temporal_simpl": [0, 7], "reduce_temporal_udf": 0, "reduce_tiles_over_tim": 0, "reduct": [0, 2], "ref": 25, "refer": [0, 2, 4, 5, 6, 7, 10, 11, 17, 22, 24, 25, 26], "referenc": [0, 7], "reference_system": 25, "reflect": [0, 2, 4, 7, 25], "reflect_pixel": [0, 2], "refresh": [0, 7, 19, 20], "refresh_token": [0, 3], "refresh_token_stor": 0, "refreshtokenstor": [0, 3, 7], "regard": [2, 6, 7], "regardless": 0, "region": [4, 5, 17], "regist": [3, 11], "registr": 3, "registri": 2, "regress": [0, 2, 20], "regro": 19, "regular": [2, 3, 17, 24, 25, 26], "regularli": 11, "reinstat": 7, "reject": 2, "rel": [0, 3, 13, 16, 17], "relat": [0, 2, 3, 4, 7, 19, 21], "relativeorbitnumb": 17, "releas": [20, 21], "relev": [3, 5, 11], "reli": [4, 12], "reliabl": [5, 25], "reload": 19, "remain": [0, 2, 12, 25], "remaind": 2, "remark": 0, "rememb": [11, 13], "remot": [0, 2, 4, 7, 11, 12, 20], "remotesens": 9, "remov": [0, 2, 3, 15, 19, 25], "remove_servic": [0, 7], "renam": [0, 2, 7, 14, 19], "rename_dimens": [0, 2, 23], "rename_label": [0, 2, 7, 23, 25], "render": [4, 7, 12, 15, 17], "renew": [0, 7], "reoccur": 26, "repeat": [0, 2, 25], "repeatedli": 2, "replac": [0, 2, 7, 11, 19], "replace_invalid": [0, 2], "replic": [0, 2], "repo": 19, "report": [0, 2, 4, 19, 26], "repositori": [2, 12, 19], "repr": [7, 15], "repres": [0, 17, 24, 25, 26], "represent": [0, 2, 4, 7, 12, 15, 16, 18, 26], "reproduc": [0, 2, 17, 25], "reproject": 2, "request": [0, 2, 3, 4, 5, 7, 17, 25], "requir": [0, 2, 3, 4, 7, 9, 12, 13, 17, 19, 24, 25, 26], "res001": 5, "res002": 5, "resampl": [0, 2, 4], "resample_cube_spati": [0, 2, 4, 23], "resample_cube_tempor": [0, 2, 7, 23], "resample_spati": [0, 2, 23], "rescal": [4, 14, 20], "rescaled_cub": 25, "research": [2, 3], "resili": 7, "resolut": [0, 2, 7, 12], "resolution_merg": [0, 7, 23], "resolv": [0, 2, 11], "resolve_from_nod": 7, "resourc": [0, 2, 4, 5, 18, 24], "respect": [0, 2, 3, 26], "respond": 14, "respons": [0, 5, 7, 17], "responsibli": 3, "rest": [2, 3, 4, 7, 11, 13, 20, 24, 25, 26], "restart": [11, 21], "restat": 25, "restcap": 0, "restfil": 7, "restjob": [0, 5, 7], "restor": [7, 19], "restrict": [0, 2, 13], "restuserdefinedprocess": [0, 16], "result": [2, 3, 4, 7, 9, 11, 12, 13, 15, 17, 19, 20, 21, 22, 25, 26], "result_ndvi": 12, "result_nod": [0, 7, 24], "resultasset": [0, 5, 7], "resum": 11, "retain": [0, 17, 25], "retent": 17, "retriev": [0, 2, 7, 11, 17, 24, 25], "return": [0, 1, 2, 3, 5, 7, 11, 12, 14, 24, 25], "return_nodata": 2, "reus": [0, 3, 4, 15, 20], "reusabl": [7, 11, 18, 24, 25, 26], "reveal": 25, "revers": [0, 2, 7], "revert": 2, "review": 19, "rework": 7, "rfc": [0, 2, 17], "rfc3339": [0, 2, 7], "rgb": [0, 9], "rich": [0, 7], "right": [0, 2, 3, 5, 17], "rioxarrai": 21, "risk": [3, 6], "rm": [2, 19], "robust": 7, "role": [0, 19, 25], "root": [0, 2, 11, 19, 21], "root_dir": 11, "roughli": [15, 18, 19], "round": [2, 23], "row": [0, 11, 22], "rst": 19, "rtc": [0, 7], "rtol": 0, "rtype": 0, "rudimentari": 25, "rule": [6, 19, 25], "run": [0, 2, 4, 7, 9, 11, 12, 13, 18, 20, 21, 24, 25], "run_cod": [0, 7, 25], "run_job": [7, 11], "run_synchron": [0, 7], "run_udf": [0, 2, 7, 23, 25], "run_udf_cod": 7, "run_udf_extern": [2, 23], "runtim": [0, 2, 7, 25], "runtimeerror": 24, "rxpk": 24, "s1": [12, 14], "s1grd": 9, "s2": [12, 14], "s2_band": 13, "s2_cube": [12, 25], "s2_datacub": 12, "s2_fapar": 6, "s2_l2a_sampl": 12, "s2_scl": 4, "s2b_32tpr_20190102_": 12, "s2b_msil2a_20190102": 12, "s2wi": 7, "safe": 3, "sai": [3, 24, 26], "same": [0, 2, 3, 4, 5, 7, 12, 17, 19, 21, 22, 24, 25, 26], "sampl": [0, 2, 4, 10, 12, 17, 20, 22, 25], "sample_by_featur": 13, "sample_geotiff": 12, "sample_netcdf": 12, "sandbox": 19, "sar": [0, 2, 10, 17], "sar_backscatt": [0, 2, 7, 9, 23], "satellit": [4, 7, 14], "satur": 2, "saturation_": 2, "save": [0, 2, 3, 7, 11, 15, 16, 19, 22], "save_ml_model": [0, 7, 22, 23], "save_result": [0, 2, 5, 7, 23], "save_to_fil": 0, "save_user_defined_process": [0, 7, 16, 26], "savgol_filt": 25, "savitzki": 25, "scalabl": 10, "scalar": 2, "scale": [0, 2, 10, 24, 25], "scenario": 9, "scene": 4, "schema": [0, 7, 11, 18], "scheme": 3, "scipi": [24, 25], "scl": [4, 12], "scl_band": 4, "scope": [3, 7, 17, 21], "screen": 6, "script": [0, 2, 3, 4, 5, 6, 17, 19, 21, 26], "scroll": 6, "sd": [0, 2, 4, 23], "sdw": 0, "search": [2, 12, 20, 25], "season": [0, 2], "second": [0, 2, 4, 5, 7, 11, 24], "secondari": [0, 7], "secondli": 6, "secret": [0, 3, 7, 8], "section": [3, 4, 8, 9, 17, 19, 25], "secur": [3, 4], "see": [0, 2, 3, 4, 5, 8, 11, 12, 14, 16, 17, 24, 25, 26], "seed": [0, 2], "seem": 6, "segment": [0, 2, 19], "select": [0, 2, 4, 9, 19, 20, 26], "self": [0, 4], "semant": [7, 19], "semi": 19, "sen2cor": 4, "send": [0, 2, 4, 5, 7], "send_job": [0, 7], "sens": [8, 17, 20, 24], "sensit": [2, 3], "sensor": [0, 2, 9], "sent": 18, "sentinel": [0, 4, 9, 12, 13, 17], "sentinel1": 7, "sentinel1_grd": [4, 9, 17], "sentinel2": [7, 13, 14, 22, 25], "sentinel2_cub": [4, 26], "sentinel2_l1c_sentinelhub": 9, "sentinel2_l2a": [4, 13, 14, 17, 25, 26], "sentinel2_l2a_sentinelhub": 26, "sentinel2_toc": 18, "sentinelhub": 9, "separ": [0, 2, 3, 4, 5, 7, 9, 11, 13, 17, 19, 24, 25], "septemb": [0, 2], "seq": 0, "sequenc": [0, 2], "seri": [0, 5, 11, 25], "serial": 0, "server": [0, 2, 19, 20], "server_address": [0, 3], "servic": [0, 2, 3, 7, 17, 26], "service_id": 0, "session": [0, 4, 7, 15, 25], "set": [0, 2, 3, 4, 5, 7, 8, 11, 13, 16, 20, 22, 24, 25], "set_datacube_list": 0, "set_structured_data_list": 0, "settingwithcopywarn": 7, "settl": 6, "setup": [19, 21], "setuptool": 19, "sever": [0, 2, 24, 25], "sgn": [2, 23], "sha1": 25, "shadow": [0, 2, 7], "shape": [0, 2, 4, 7, 12, 19, 25], "sharabl": 16, "share": [3, 7, 10, 17, 20], "shell": [3, 21], "short": [0, 2, 3, 4, 25], "shortcut": [0, 2, 7, 17], "shorthand": [0, 7], "should": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 13, 17, 18, 19, 21, 24, 25, 26], "show": [0, 3, 4, 5, 7, 9, 17, 19], "show_axeslabel": 0, "show_bandnam": 0, "show_dat": 0, "shown": [3, 5, 7, 19, 25], "shrink": 2, "side": [0, 1, 2, 4, 5, 7, 10, 15, 16, 20], "sight": [0, 2], "sigma0": [0, 2, 9], "sign": [0, 2], "signal": 25, "signatur": [2, 7, 20, 24], "signific": 9, "signum": 2, "silent": 6, "similar": [12, 24], "simpl": [0, 1, 2, 3, 4, 5, 8, 12, 14, 17, 18, 19, 20, 24, 25, 26], "simpler": 2, "simplest": [19, 25], "simpli": [0, 2, 9, 17], "simplic": 22, "simplifi": [0, 4, 7, 14, 26], "sin": [2, 23], "sinc": [0, 2, 3, 4, 5, 13, 17], "sine": 2, "singl": [0, 2, 4, 7, 11, 12, 13, 14, 15, 16, 18, 24, 25, 26], "singular": 5, "sinh": [2, 23], "site": 0, "situat": [3, 5, 24, 26], "six": [0, 2], "sixth": 2, "size": [0, 2, 3, 13, 25, 26], "size_param": 26, "skip": [0, 5, 7, 17, 19, 25], "skip_verif": 0, "sleep": [0, 11], "slice": [4, 24, 25], "slide": [0, 25], "slightli": 3, "slow": [0, 19], "slow_response_threshold": 0, "slower": 7, "sluo": 4, "smac": 9, "small": [0, 4, 6, 13, 17, 25], "smaller": [13, 25], "smallest": 2, "smooth": 20, "smooth_savitzky_golai": 25, "smoothed_arrai": 25, "smoothed_evi": 25, "smoother": 25, "smoothing_udf": 25, "snake": 0, "snippet": [0, 1, 2, 3, 4, 11, 12, 17, 25], "snow": 2, "so": [0, 2, 3, 4, 5, 9, 13, 16, 19, 21, 22, 24, 25, 26], "soft": [0, 7], "soft_error_max": 0, "softwar": 6, "solut": [3, 17, 19], "solv": [7, 24], "some": [0, 1, 2, 3, 4, 5, 6, 7, 8, 12, 14, 16, 17, 19, 21, 24, 25, 26], "somehow": 25, "someon": 6, "someth": [0, 2, 18, 21, 25], "sometim": [19, 25, 26], "somewhat": 22, "son": 2, "sort": [0, 2, 23], "sourc": [0, 2, 3, 6, 7, 11, 14, 19, 20, 24, 25], "south": [0, 4, 6, 7, 9, 12, 17, 20, 24, 25, 26], "space": [6, 9, 17, 25], "span": 2, "spark": 25, "spars": 13, "spatial": [0, 2, 4, 7, 12, 13, 20, 25, 26], "spatial_ext": [0, 2, 4, 6, 7, 9, 12, 17, 20, 22, 25, 26], "spatialdimens": 0, "spatio": [4, 5, 17], "spatiotempor": [0, 25], "speak": [0, 26], "spec": [7, 12, 19, 26], "special": [0, 7, 9, 12, 24, 26], "specif": [0, 2, 3, 4, 7, 9, 12, 13, 15, 17, 19, 24, 25, 26], "specifi": [0, 2, 3, 4, 5, 7, 9, 11, 14, 17, 18, 22, 24, 25, 26], "spectral": [0, 2, 4, 7, 10, 20, 26], "spectral_indic": [7, 14], "spent": 0, "sphinx": 19, "spline": 2, "split": [0, 2, 6, 25], "sqrt": [2, 23, 24], "squar": [0, 2], "sr": 0, "src": 0, "srr3": 20, "srr5": 20, "srr6": 20, "stabl": 2, "stac": [0, 2, 5, 7, 10, 17, 22], "stac_vers": 4, "stack": [0, 12, 25], "stackstac": 12, "stage": [17, 19], "stai": 2, "standalon": 25, "standard": [0, 2, 6, 7, 14, 15, 16, 17, 19, 24], "star": 6, "start": [0, 1, 2, 3, 7, 11, 12, 18, 19, 20, 21, 22, 24, 25, 26], "start_and_wait": [0, 5, 7, 22], "start_dat": [0, 11, 17, 24], "start_job": [0, 7, 11], "start_job_thread": 11, "startswith": 5, "stat": [5, 7, 11, 25], "state": [2, 11], "statement": [1, 2, 4, 5, 6, 24], "static": [0, 2], "statist": [0, 2, 17, 20, 24, 25], "statu": [0, 4, 5, 7, 11, 16], "status": 11, "stdout": 0, "step": [0, 3, 4, 5, 9, 12, 17, 18, 19, 25], "stick": 0, "still": [0, 2, 3, 5, 6, 7, 11, 12, 13, 17, 21, 25], "stolen": 3, "stop": [0, 3, 4, 5, 7, 11, 25], "stop_job": 0, "stop_job_thread": 11, "storag": [0, 3, 7], "store": [0, 2, 3, 5, 7, 8, 11, 13, 16, 18, 20, 24], "store_refresh_token": [0, 3], "str": [0, 11, 14], "straightforward": [3, 5, 18, 24, 26], "strang": 6, "strategi": 0, "stream": 0, "streamlin": [4, 6, 19], "stretch_color": 7, "strict": [6, 7], "strictli": 2, "string": [0, 2, 4, 7, 15, 16, 18, 19, 22, 25, 26], "strip": 0, "strong": 6, "strongli": 6, "structur": [0, 2, 4, 7, 12, 18, 22, 24, 25], "structured_data": 0, "structured_data_list": 0, "structureddata": 0, "stuck": 25, "style": [0, 4, 7, 8, 15, 19, 20, 24, 26], "sub": [0, 2, 7, 18, 19, 24, 26], "subclass": [0, 7], "subcommand": 3, "subfold": 11, "subject": [0, 4, 8, 11, 12, 14, 15, 16, 22, 25], "submit": [0, 5, 26], "submodul": [7, 19], "subpackag": [7, 14], "subprocess": 0, "subrepo": 19, "subsect": 19, "subsequ": [0, 2, 3, 4], "subset": [0, 2, 19, 24], "subshel": 19, "substitut": 18, "subtract": [0, 2, 16, 18, 23, 24, 26], "subtract1": [4, 26], "subtract32": 26, "subtrahend": 2, "subtyp": [0, 7, 11, 26], "success": [0, 3, 8], "successfulli": [2, 3, 5, 7, 19, 22, 26], "suffici": [9, 25], "suffix": [7, 19], "sugar": [1, 2, 4, 11], "suggest": 11, "suit": [3, 19], "suitabl": [0, 2, 9, 17, 25], "sum": [2, 7, 18, 23, 24, 25], "summand": 2, "summari": [0, 7], "sun": [2, 9], "sunazimuthangl": 9, "sunzenithangl": 9, "superclass": 7, "support": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 14, 16, 17, 18, 20, 21, 24, 25, 26], "sure": [3, 4, 5, 7, 9, 17, 19, 21, 25], "surfac": [0, 2], "surpris": 19, "suspect": 3, "swir": 14, "switch": [3, 5], "sy": [0, 25], "symbol": [0, 24, 26], "synchron": [0, 5, 7, 13, 18, 20, 25, 26], "syntact": [1, 2, 4, 11], "syntax": [0, 7, 25, 26], "synthet": 9, "system": [0, 2, 3, 5, 7, 15, 17, 19, 21], "systemat": 0, "t": [0, 1, 2, 3, 4, 5, 7, 8, 12, 14, 17, 21, 22, 24, 25, 26], "tab": [5, 7], "tabl": [0, 23], "tabular": 13, "tag": [19, 25], "take": [0, 1, 2, 3, 4, 5, 7, 11, 18, 20, 24, 26], "taken": [0, 2], "tan": [2, 23], "tangent": 2, "tanh": [2, 23], "tar": 25, "target": [0, 2, 22, 25], "target_band": [0, 2, 7], "target_dimens": [0, 2, 7], "targetdimensionexist": 2, "task": [4, 24], "tast": 6, "technic": [3, 4, 6, 17, 22], "tediou": 19, "temp": 0, "templat": [7, 11], "tempor": [0, 1, 2, 4, 5, 7, 11, 12, 20, 22, 24, 25, 26], "temporal_ext": [0, 2, 4, 6, 7, 9, 11, 12, 13, 17, 20, 22, 25, 26], "temporal_interv": [0, 7, 26], "temporaldimens": 0, "temporalextentempti": [0, 2], "temporari": [0, 7, 19], "temporarili": 19, "ten": [0, 2], "term": [3, 14, 22], "terminologi": 20, "terrain": [0, 2, 9], "terrascop": [4, 16], "terrascope_s2_fapar_v2": 6, "terrascope_s2_ndvi_v2": 20, "test": [2, 7, 9, 11, 12, 17, 20, 21, 25], "test_10": 13, "test_data": 0, "test_input": 25, "testdata": 13, "testdataload": 0, "text": [0, 2], "text_begin": [2, 23], "text_concat": [2, 23], "text_contain": [2, 23], "text_end": [2, 23], "than": [0, 2, 3, 4, 7, 12, 13, 18, 24, 25], "thei": [0, 2, 5, 7, 15, 18, 24, 25], "them": [0, 2, 3, 5, 8, 9, 11, 14, 17, 18, 19, 24, 25], "therefor": [0, 25], "thi": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 25, 26], "thin": 0, "thing": [0, 3, 6, 25], "think": 24, "third": [0, 2, 12], "those": [0, 2, 17, 25], "thousand": 25, "thread": [7, 11], "three": [0, 2], "threshold": 0, "through": [0, 2, 3, 4, 5, 7, 8, 10, 17, 18, 19, 22, 24, 25], "throw": [0, 2, 7], "thrown": [0, 2], "thu": [2, 9], "thumb": 25, "ti": 0, "ticket": 16, "tif": 25, "tiff": [4, 5, 20, 26], "tight": 6, "tightli": [0, 9], "tile": 5, "tiled_viewing_servic": 7, "till": [0, 2], "time": [0, 1, 2, 3, 4, 5, 7, 9, 11, 12, 16, 17, 18, 21, 22, 24, 25, 26], "time_window": 26, "timeout": [0, 3, 7], "timeout_second": 11, "timeseri": [0, 1, 2, 5, 20, 24], "timeseries_json_to_panda": [0, 4], "timestamp": [0, 2, 17], "timezon": 7, "timinglogg": 7, "tip": [10, 20, 21], "titl": [0, 4, 5, 7, 11, 12, 13, 15, 16, 19], "tmp": 19, "tmp_path": 0, "to_bbox_dict": [0, 7], "to_celsiu": 16, "to_datetim": 4, "to_dict": 0, "to_fil": 0, "to_json": [0, 4, 7, 15, 26], "to_netcdf_fil": 7, "to_process_graph_argu": 0, "to_show": 0, "toa": 2, "todai": 7, "todo": 18, "togeth": [11, 24, 26], "toggl": 0, "toi": 18, "token": [0, 2, 7, 20], "tokeninvalid": 7, "toler": 0, "toml": 25, "ton": 19, "too": [0, 3, 4, 5, 7, 17, 19, 24, 26], "tool": [2, 5, 6, 7, 8, 12, 15, 17, 19, 20, 21, 25], "toolbox": 9, "toomanydimens": 2, "top": [0, 1, 2, 3, 4, 25, 26], "topic": 4, "total": 2, "total_count": 2, "touch": 2, "tr": 0, "trace": 0, "track": [0, 1, 2, 4, 6, 7, 11, 19, 21, 24], "tracker": [11, 21], "traction": 6, "tradit": 19, "trail": 25, "train": [0, 2, 7, 13], "training_job": 22, "transfer": [13, 18], "transform": [0, 2, 12, 20, 24, 26], "translat": [1, 2, 7, 17, 24], "transpar": 25, "travi": 7, "tree": [0, 3], "tri": [0, 4, 7, 19], "triangl": 0, "trick": [10, 20], "trigger": [0, 3, 4, 7, 19, 25], "trim": [0, 2], "trim_cub": [2, 23], "trivial": 4, "tropic": [0, 2], "troubl": [21, 24], "troubleshoot": 20, "true": [0, 2, 3, 7, 13, 15, 16, 18, 25, 26], "try": [0, 3, 7, 9, 18, 19, 21, 22, 24, 25], "tune": [5, 7, 14, 18, 19, 26], "tupl": [0, 7, 17], "turn": [25, 26], "tutori": [4, 6], "tweak": [7, 19], "twice": 24, "twine": 19, "two": [0, 2, 3, 4, 7, 17, 24, 26], "type": [0, 1, 2, 3, 4, 5, 7, 11, 14, 17, 18, 22, 25, 26], "typeerror": 0, "typic": [0, 3, 5, 11, 12, 14, 17, 18, 19, 24, 25], "u": [3, 4, 16, 19], "u24": 12, "u3": 12, "u65": 12, "udf": [2, 20, 26], "udf_cod": 25, "udf_data": 0, "udf_dict": 0, "udf_modify_spati": 25, "udfdata": [0, 25], "udp": [7, 10, 11, 20, 24, 25], "udp_url": 16, "ui": 25, "ultim": 26, "unambigu": 2, "unari": 0, "unattend": 3, "unavail": 7, "unbound": [0, 2], "unchang": [0, 2, 25], "uncommit": 19, "uncommon": 8, "uncorrect": 9, "undefin": 2, "under": [0, 3, 4, 5, 19, 22], "underli": [0, 2, 9, 15], "underpin": 25, "underscor": 0, "understand": [1, 2, 3, 6, 17, 25], "uneven": 2, "unflatten": [0, 7], "unflatten_dimens": [0, 2, 7, 23], "unfortun": 17, "unhandl": 14, "unhelp": 7, "uniform": 25, "unintend": 17, "unintuit": 17, "union": [0, 2, 11], "uniqu": [0, 2], "unit": [0, 2, 7, 20, 21, 24, 25], "unitmismatch": 2, "unknown": [0, 11], "unless": [2, 19], "unlock": 21, "unmodifi": 0, "unnecessari": 0, "unnecessarili": [6, 7, 17], "unreleas": [19, 20, 21], "unrol": 16, "until": [2, 4, 5], "unus": [3, 7, 25], "unwant": 4, "unzip": 19, "unzipped_virtualenv_loc": 25, "up": [0, 2, 3, 5, 6, 7, 11, 13, 15, 17, 22, 24, 25], "updat": [0, 2, 7, 20, 25], "update_argu": [0, 7, 24], "upgrad": 21, "upload": [0, 7, 17, 19], "upload_fil": 0, "upper": [0, 2], "upstream": [2, 19], "urban": [7, 14], "uri": 0, "url": [0, 2, 4, 7, 8, 10, 11, 12, 15, 17, 18, 20, 22, 24, 25], "us": [0, 1, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19, 20, 21, 22, 25], "usabl": [3, 4, 7], "usag": [0, 1, 2, 3, 4, 5, 6, 7, 10, 11, 24, 26], "use_pkc": 0, "use_pyproj": 0, "user": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 15, 17, 18, 19, 20, 24], "user_cod": 3, "user_context": 0, "user_defined_process": 0, "user_defined_process_id": [0, 26], "user_id": 3, "user_job": 0, "userfil": [7, 20], "usernam": [0, 3], "usual": [0, 2, 3, 5, 8, 13, 17, 19, 24, 25, 26], "utc": [2, 7, 17], "utcnow": 7, "utf8": 15, "util": [7, 20], "utm": 13, "uv": 19, "ux": 20, "v0": [7, 19], "v1": [9, 12], "v2": 5, "v3": 25, "valid": [0, 2, 3, 5, 7, 9, 12, 13, 18, 25], "valid_count": 2, "valid_within": [0, 2], "validate_process_graph": 0, "valu": [0, 1, 2, 4, 5, 7, 8, 11, 15, 17, 18, 20, 22, 24, 26], "valuabl": 4, "valueerror": [0, 7], "vapour": [2, 14], "var": 7, "vari": [0, 25], "variabl": [0, 2, 4, 7, 8, 14, 24], "variable_map": 14, "varianc": [2, 23], "variant": [0, 9], "varieti": 9, "variou": [0, 3, 4, 6, 7, 8, 14, 17, 19, 25], "vectocub": 7, "vector": [0, 2, 4, 7, 13, 20, 22, 24, 25], "vector_buff": [2, 23], "vector_reproject": 2, "vector_to_random_point": [2, 23], "vector_to_rast": [0, 7], "vector_to_regular_point": [2, 23], "vectorcub": [7, 15, 18, 20, 23, 24], "vectorcube_from_path": [0, 7, 17], "veget": [0, 2, 4, 7, 14], "venv": [19, 21], "verbos": [3, 7, 8], "veri": [0, 3, 4, 6, 7, 9, 16, 17, 24, 25, 26], "verif": 0, "verifi": [3, 7, 19, 25], "versatil": 25, "version": [0, 2, 3, 5, 6, 7, 8, 9, 11, 14, 15, 17, 18, 19, 20, 21, 22, 24, 26], "version_discoveri": 0, "version_info": [0, 7], "vertic": 2, "vgt": [13, 19], "vh": 9, "via": 19, "view": [2, 6, 9], "viewazimuthmean": 9, "viewer": 17, "viewport": 6, "viewzenithmean": 9, "violat": [15, 19], "virtual": [1, 2, 19, 21, 25], "visibl": [0, 2], "visit": [3, 4, 19], "visual": [0, 5, 7, 12, 17, 25], "visualis": 21, "vito": [4, 9, 13, 16, 19, 20], "vue": 7, "vv": 9, "w": 26, "w3": 0, "wa": [0, 2, 3, 5, 7, 11, 15, 17, 25], "wai": [0, 2, 3, 4, 5, 6, 17, 18, 19, 22, 24, 25, 26], "wait": [0, 4, 7, 11, 19], "walk": [0, 2], "walk_nod": 0, "want": [0, 1, 2, 3, 4, 5, 12, 13, 14, 17, 18, 19, 21, 24, 25, 26], "warn": [0, 5, 7, 16, 21, 25], "warp": 2, "watch": 19, "water": [2, 7, 9, 14], "water_vapor": 2, "wavelength": 2, "wd23": 22, "we": [1, 2, 3, 4, 9, 11, 12, 13, 16, 17, 18, 19, 22, 24, 25, 26], "web": [0, 3, 4, 5, 7, 9, 19, 26], "webbrowser_open": 0, "week": [0, 2], "weekli": 3, "weight": [0, 2], "welcom": [11, 19, 20], "well": [0, 1, 2, 4, 6, 7, 14, 21, 25, 26], "went": 21, "were": [7, 24, 25], "west": [0, 4, 6, 7, 9, 12, 17, 20, 24, 25, 26], "wg": 17, "wgs84": 0, "what": [0, 1, 2, 3, 5, 19, 24, 25, 26], "wheel": [19, 25], "when": [0, 1, 2, 3, 4, 5, 7, 8, 9, 11, 13, 14, 15, 16, 17, 18, 19, 22, 24, 25, 26], "whenev": 25, "where": [0, 1, 2, 3, 5, 6, 7, 8, 11, 13, 15, 18, 19, 22, 25], "whether": [0, 2, 4], "which": [0, 1, 2, 3, 4, 5, 6, 7, 11, 12, 13, 14, 15, 17, 18, 19, 21, 24, 25, 26], "while": [0, 1, 2, 3, 4, 5, 6, 7, 11, 17, 18, 19, 24, 25, 26], "whitespac": 19, "whl": [19, 25], "whole": [0, 2, 3, 7, 19, 22, 25], "whose": [0, 2], "wide": [0, 25], "wider": 6, "widget": [0, 7], "wiki": 2, "wikipedia": 2, "window": [0, 2, 4, 7, 17, 20, 25, 26], "winter": 2, "wise": 0, "wish": 24, "within": [0, 2, 7, 25], "without": [0, 2, 3, 4, 7, 8, 9, 12, 15, 18, 19, 25], "wkt2": [0, 2], "won": [3, 25], "word": [0, 24], "work": [0, 3, 4, 5, 7, 8, 9, 11, 13, 15, 17, 19, 20, 21, 25, 26], "workaround": 7, "worker": [5, 25], "workflow": [0, 3, 6, 15, 19, 21, 22, 26], "workspac": [0, 2], "world": 17, "worri": [4, 24, 26], "wors": 3, "would": [0, 2, 3, 7, 11, 15, 25], "wrap": [0, 2, 7], "wrapper": [0, 7, 26], "write": [0, 4, 7, 11, 13, 15, 16, 21, 24], "write_text": 15, "written": 0, "wrong": 26, "wrongli": 24, "wv": 14, "www": [0, 2], "x": [0, 2, 4, 7, 12, 15, 16, 17, 18, 20, 24, 25, 26], "xarrai": [0, 7, 12, 21, 25], "xarraydatacub": [0, 7, 25], "xarrayio": 7, "xdc_dict": 0, "xdg_config_hom": 8, "xor": [2, 23], "xstep": 25, "xyz": [0, 7], "y": [0, 2, 4, 7, 12, 15, 16, 18, 24, 25, 26], "yaml": 19, "year": [0, 2, 7, 11], "yearli": 2, "yellow": 0, "yet": [0, 2, 3, 4, 5, 7, 9, 11, 24, 25], "you": [0, 1, 2, 3, 4, 5, 6, 9, 11, 12, 13, 14, 15, 17, 18, 19, 21, 22, 24, 25, 26], "your": [0, 1, 2, 3, 4, 6, 7, 9, 12, 13, 14, 15, 17, 18, 19, 20, 21, 24, 25, 26], "yourself": [17, 19, 24], "ystep": 25, "yyy0": 2, "yyy1": 2, "yyyi": [2, 17], "zarr": 12, "zero": [0, 2], "zip": [19, 25], "zonal": [0, 2, 4], "zonal_statist": 7, "zone": 13, "zoom": 0, "\u03bcm": 2, "\u03c0": 2}, "titles": ["API (General)", "<no title>", "API: openeo.processes", "Authentication and Account Management", "Getting Started", "Batch Jobs", "Best practices, coding style and general tips", "Changelog", "Configuration", "Analysis Ready Data generation", "openEO CookBook", "Multi Backend Job Manager", "Client-side (local) processing", "Dataset sampling", "Spectral Indices", "Miscellaneous tips and tricks", "Sharing of user-defined processes", "Finding and loading data", "DataCube construction", "Development and maintenance", "openEO Python Client", "Installation", "Machine Learning", "openEO Process Mapping", "Working with processes", "User-Defined Functions (UDF) explained", "User-Defined Processes (UDP)"], "titleterms": {"": 25, "0": [7, 25], "01": 7, "02": 7, "03": 7, "04": 7, "05": 7, "06": 7, "07": 7, "08": 7, "09": 7, "1": 7, "10": 7, "11": 7, "12": 7, "13": [7, 25], "14": 7, "15": 7, "16": 7, "17": 7, "18": 7, "19": 7, "2": 7, "20": 7, "2020": 7, "2021": 7, "2022": 7, "2023": 7, "2024": 7, "21": 7, "22": 7, "23": 7, "24": 7, "25": 7, "26": 7, "27": 7, "28": 7, "29": 7, "30": 7, "31": 7, "32": 7, "33": 7, "34": 7, "35": 7, "36": 7, "4": 7, "5": 7, "6": 7, "7": 7, "8": 7, "9": 7, "A": [24, 25], "The": 18, "account": 3, "ad": [7, 24, 25], "addit": 21, "advanc": [24, 26], "aggreg": 4, "all": 5, "altern": 19, "an": [4, 17, 25], "analysi": 9, "api": [0, 2, 11, 14, 24, 25], "appli": [4, 25], "applic": [3, 25], "apply_dimens": 25, "apply_neighborhood": 25, "argument": 24, "asset": 5, "asynchron": 4, "atmospher": 9, "auth": 3, "authent": [3, 4], "auto": 3, "automat": [5, 14], "back": [3, 4, 9], "backend": 11, "background": [6, 12], "backscatt": 9, "band": [4, 14], "base": [3, 11, 16, 22], "basic": [3, 11, 19, 21, 24], "batch": [4, 5], "best": [3, 6], "bit": 24, "build": [0, 18, 19, 26], "call": 24, "callabl": 24, "callback": [24, 25], "case": 4, "caveat": 24, "chang": [7, 25], "changelog": 7, "check": 19, "child": 24, "class": 2, "classif": 22, "clear": 3, "client": [3, 12, 20], "close": 17, "cloud": 4, "code": [3, 6, 19, 26], "collect": [4, 12, 17], "commit": 19, "common": 24, "comput": 4, "conda": 21, "config": 3, "configur": 8, "connect": [0, 3, 4], "constraint": 25, "construct": 18, "content": [10, 20], "context": 3, "contribut": 19, "conveni": 24, "convers": 0, "cookbook": 10, "correct": 9, "creat": [5, 19], "creation": 11, "credenti": 3, "cube": [4, 17, 24, 25, 26], "data": [4, 9, 17, 24, 26], "datacub": [0, 16, 18, 25], "dataset": 13, "date": 17, "declar": [25, 26], "default": 3, "defin": [16, 24, 25, 26], "depend": [21, 25], "deprec": 7, "develop": [19, 21], "devic": 3, "dictionari": 26, "directli": [5, 15], "discoveri": [4, 17], "do": 11, "document": 19, "down": 17, "download": [4, 5, 25], "dynam": 3, "easi": 19, "enabl": 21, "end": [3, 4, 9, 17], "environ": 3, "eodc": 9, "evalu": 26, "evi": [4, 26], "exampl": [4, 11, 18, 20, 25, 26], "exclud": 17, "execut": [4, 15, 25], "explain": 25, "explor": 17, "export": 15, "extent": 17, "featur": 21, "file": [3, 8, 19, 26], "filter": 17, "find": 17, "fine": 5, "finish": 5, "first": 25, "fix": 7, "flow": 3, "forest": 22, "format": 8, "from": [15, 17, 18, 24, 25, 26], "function": [2, 25, 26], "gener": [0, 3, 6, 9, 19, 24], "geometri": 11, "geotrelli": 9, "get": 4, "go": 5, "grain": 5, "graph": [0, 15, 18], "guidelin": 3, "handl": [11, 17, 25], "helper": [2, 3], "high": 0, "hoc": 25, "http": 3, "implement": 9, "import": 19, "includ": 17, "indic": [14, 20], "infer": 22, "inform": 25, "initi": [4, 17], "inspir": 6, "instal": [12, 19, 21], "integr": 5, "interact": 3, "interfac": 0, "intern": 0, "interv": 17, "item": 12, "job": [0, 4, 5, 11], "json": [15, 18], "jupyt": [5, 6], "lab": 6, "larg": 17, "learn": 22, "left": 17, "legaci": 15, "length": 6, "level": 0, "like": 19, "line": 6, "list": 5, "load": [4, 5, 16, 17], "load_collect": 18, "local": [12, 25], "locat": 8, "log": [0, 5, 25], "long": 3, "lps22": 7, "machin": 22, "mainten": 19, "manag": [3, 6, 11, 25], "manual": 14, "map": [4, 14, 23], "mask": 4, "math": 4, "metadata": [0, 25], "method": [3, 24], "miscellan": 15, "mlmodel": 0, "modul": 25, "month": 17, "more": 26, "multi": 11, "multipl": [4, 18], "multiresult": 0, "name": 25, "namespac": 16, "node": 18, "non": 3, "notat": 17, "object": 5, "oidc": 3, "one": 5, "openeo": [0, 2, 3, 4, 10, 17, 20, 23, 25], "openid": 3, "option": [3, 8, 21], "other": 24, "paramet": 26, "parameter": [11, 18, 26], "pass": 24, "perform": 13, "period": 17, "pgnode": 24, "pip": 21, "pixel": 25, "practic": [3, 6], "pre": [19, 24], "predefin": 26, "prerequisit": 19, "print": 5, "pro": 19, "procedur": 19, "process": [0, 2, 11, 12, 15, 16, 18, 23, 24, 25, 26], "processbasedjobcr": 11, "processbuild": 2, "profil": 25, "properti": 17, "public": [0, 16], "publicli": 16, "publish": 16, "pull": 19, "python": [20, 25], "qualiti": 19, "quick": 19, "random": 22, "raw": 15, "re": 18, "read_vector": 15, "readi": 9, "recommend": 6, "reconnect": 5, "reduc": 25, "reduce_dimens": 25, "refer": 9, "refresh": 3, "regress": 22, "releas": [7, 19], "remov": 7, "request": 19, "rescal": 25, "rest": 0, "result": [0, 5, 18, 24], "reus": 26, "round": 17, "run": [3, 5, 19], "sampl": 13, "sar": 9, "scalabl": 13, "scale": 13, "schema": 26, "script": 25, "section": 2, "select": 3, "server": 25, "set": [17, 19], "share": 16, "shorthand": 17, "side": [12, 25], "signatur": 25, "singl": [5, 17], "smooth": 25, "some": 18, "sourc": 21, "spatial": 17, "spectral": 14, "srr3": 7, "srr5": 7, "srr6": 7, "stac": 12, "standard": 25, "start": [4, 5, 17], "statist": 4, "store": 26, "string": [17, 24], "style": 6, "synchron": 4, "tabl": 20, "tempor": 17, "terminologi": 24, "test": [0, 19], "through": [16, 26], "timeseri": [4, 25, 26], "tip": [3, 6, 15], "token": 3, "tool": 3, "train": 22, "transform": 25, "trick": [6, 15], "troubleshoot": [3, 21], "tweak": 24, "udf": [0, 7, 25], "udf_signatur": 25, "udp": [0, 16, 26], "unit": 19, "unreleas": 7, "up": 19, "updat": 19, "url": [3, 16], "us": [3, 4, 16, 24, 26], "usag": [12, 15, 19, 20, 25], "user": [16, 25, 26], "userfil": 0, "util": 0, "ux": 7, "valu": 25, "variabl": 3, "vector": 17, "vectorcub": 0, "verif": [19, 25], "verifi": 21, "version": 25, "view": 25, "wait": 5, "window": 19, "work": 24, "workflow": 25, "year": 17, "your": 5}}) \ No newline at end of file diff --git a/udf.html b/udf.html new file mode 100644 index 000000000..78341a03c --- /dev/null +++ b/udf.html @@ -0,0 +1,917 @@ + + + + + + + + User-Defined Functions (UDF) explained — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +
+

User-Defined Functions (UDF) explained

+

While openEO supports a wide range of pre-defined processes +and allows to build more complex user-defined processes from them, +you sometimes need operations or algorithms that are +not (yet) available or standardized as openEO process. +User-Defined Functions (UDF) is an openEO feature +(through the run_udf process) +that aims to fill that gap by allowing a user to express (a part of) +an algorithm as a Python/R/… script to be run back-end side.

+

There are a lot of details to cover, +but here is a rudimentary example snippet +to give you a quick impression of how to work with UDFs +using the openEO Python Client library:

+
+
Basic UDF usage example snippet to rescale pixel values
+
import openeo
+
+# Build a UDF object from an inline string with Python source code.
+udf = openeo.UDF("""
+import xarray
+
+def apply_datacube(cube: xarray.DataArray, context: dict) -> xarray.DataArray:
+    cube.values = 0.0001 * cube.values
+    return cube
+""")
+
+# Or load the UDF code from a separate file.
+# udf = openeo.UDF.from_file("udf-code.py")
+
+# Apply the UDF to a cube.
+rescaled_cube = cube.apply(process=udf)
+
+
+
+

Ideally, it allows you to embed existing Python/R/… implementations +in an openEO workflow (with some necessary “glue code”). +However, it is recommended to try to do as much pre- or postprocessing +with pre-defined processes +before blindly copy-pasting source code snippets as UDFs. +Pre-defined processes are typically well-optimized by the backend, +while UDFs can come with a performance penalty +and higher development/debug/maintenance costs.

+
+

Warning

+

Don not confuse user-defined functions (abbreviated as UDF) with +user-defined processes (sometimes abbreviated as UDP) in openEO, +which is a way to define and use your own process graphs +as reusable building blocks. +See User-Defined Processes (UDP) for more information.

+
+
+

Applicability and Constraints

+

openEO is designed to work transparently on large data sets +and your UDF has to follow a couple of guidelines to make that possible. +First of all, as data cubes play a central role in openEO, +your UDF should accept and return correct data cube structures, +with proper dimensions, dimension labels, etc. +Moreover, the back-end will typically divide your input data cube +in smaller chunks and process these chunks separately (e.g. on isolated workers). +Consequently, it’s important that your UDF algorithm operates correctly +in such a chunked processing context.

+

A very common mistake is to use index-based array indexing, rather than name based. The index based approach +assumes that datacube dimension order is fixed, which is not guaranteed. Next to that, it also reduces the readability +of your code. Label based indexing is a great feature of xarray, and should be used whenever possible.

+

As a rule of thumb, the UDF should preserve the dimensions and shape of the input +data cube. The datacube chunk that is passed on by the backend does not have a fixed +specification, so the UDF needs to be able to accomodate different shapes and sizes of the data.

+

There’s important exceptions to this rule, that depend on the context in which the UDF is used. +For instance, a UDF used as a reducer should effectively remove the reduced dimension from the +output chunk. These details are documented in the next sections.

+
+

UDFs as apply/reduce “callbacks”

+

UDFs are typically used as “callback” processes for “meta” processes +like apply or reduce_dimension (also see Processes with child “callbacks”). +These meta-processes make abstraction of a datacube as a whole +and allow the callback to focus on a small slice of data or a single dimension. +Their nature instructs the backend how the data should be processed +and can be chunked:

+
+
apply

Applies a process on each pixel separately. +The back-end has all freedom to choose chunking +(e.g. chunk spatially and temporally). +Dimensions and their labels are fully preserved. +This function has limited practical use in combination with UDF’s.

+
+
apply_dimension

Applies a process to all pixels along a given dimension +to produce a new series of values for that dimension. +The back-end will not split your data on that dimension. +For example, when working along the time dimension, +your UDF is guaranteed to receive a full timeseries, +but the data could be chunked spatially. +All dimensions and labels are preserved, +except for the dimension along which apply_dimension is applied: +the number of dimension labels is allowed to change.

+
+
reduce_dimension

Applies a process to all pixels along a given dimension +to produce a single value, eliminating that dimension. +Like with apply_dimension, the back-end will +not split your data on that dimension. +The dimension along which apply_dimension is applied must be removed +from the output. +For example, when applying reduce_dimension on a spatiotemporal cube +along the time dimension, +the UDF is guaranteed to receive full timeseries +(but the data could be chunked spatially) +and the output cube should only be a spatial cube, without a temporal dimension

+
+
apply_neighborhood

Applies a process to a neighborhood of pixels +in a sliding-window fashion with (optional) overlap. +Data chunking in this case is explicitly controlled by the user. +Dimensions and number of labels are fully preserved. This is the most versatile +and widely used function to work with UDF’s.

+
+
+
+
+
+

UDF function names and signatures

+

The UDF code you pass to the back-end is basically a Python script +that contains one or more functions. +Exactly one of these functions should have a proper UDF signature, +as defined in the openeo.udf.udf_signatures module, +so that the back-end knows what the entrypoint function is +of your UDF implementation.

+
+

Module openeo.udf.udf_signatures

+

This module defines a number of function signatures that can be implemented by UDF’s. +Both the name of the function and the argument types are/can be used by the backend to validate if the provided UDF +is compatible with the calling context of the process graph in which it is used.

+
+
+openeo.udf.udf_signatures.apply_datacube(cube, context)[source]
+

Map a XarrayDataCube to another XarrayDataCube.

+

Depending on the context in which this function is used, the XarrayDataCube dimensions +have to be retained or can be chained. +For instance, in the context of a reducing operation along a dimension, +that dimension will have to be reduced to a single value. +In the context of a 1 to 1 mapping operation, all dimensions have to be retained.

+
+
Parameters:
+
    +
  • cube (XarrayDataCube) – input data cube

  • +
  • context (dict) – A dictionary containing user context.

  • +
+
+
Return type:
+

XarrayDataCube

+
+
Returns:
+

output data cube

+
+
+
+ +
+
+openeo.udf.udf_signatures.apply_metadata(metadata, context)[source]
+
+

Warning

+

This signature is not yet fully standardized and subject to change.

+
+

Returns the expected cube metadata, after applying this UDF, based on input metadata. +The provided metadata represents the whole raster or vector cube. This function does not need to be called for every data chunk.

+

When this function is not implemented by the UDF, the backend may still be able to infer correct metadata by running the +UDF, but this can result in reduced performance or errors.

+

This function does not need to be provided when using the UDF in combination with processes that by design have a clear +effect on cube metadata, such as reduce_dimension()

+
+
Parameters:
+
    +
  • metadata (CollectionMetadata) – the collection metadata of the input data cube

  • +
  • context (dict) – A dictionary containing user context.

  • +
+
+
Return type:
+

CollectionMetadata

+
+
Returns:
+

output metadata: the expected metadata of the cube, after applying the udf

+
+
+
+

Examples

+

An example for a UDF that is applied on the ‘bands’ dimension, and returns a new set of bands with different labels.

+
>>> def apply_metadata(metadata: CollectionMetadata, context: dict) -> CollectionMetadata:
+...     return metadata.rename_labels(
+...         dimension="bands",
+...         target=["computed_band_1", "computed_band_2"]
+...     )
+
+
+
+
+ +
+
+openeo.udf.udf_signatures.apply_timeseries(series, context)[source]
+

Process a timeseries of values, without changing the time instants.

+

This can for instance be used for smoothing or gap-filling.

+
+
Parameters:
+
    +
  • series (Series) – A Pandas Series object with a date-time index.

  • +
  • context (dict) – A dictionary containing user context.

  • +
+
+
Return type:
+

Series

+
+
Returns:
+

A Pandas Series object with the same datetime index.

+
+
+
+ +
+
+openeo.udf.udf_signatures.apply_udf_data(data)[source]
+

Generic UDF function that directly manipulates a UdfData object

+
+
Parameters:
+

data (UdfData) – UdfData object to manipulate in-place

+
+
+
+ +
+
+openeo.udf.udf_signatures.apply_vectorcube(geometries, cube, context)[source]
+

Map a vector cube to another vector cube.

+
+
Parameters:
+
    +
  • geometries (geopandas.geodataframe.GeoDataFrame) – input geometries as a geopandas.GeoDataFrame. This contains the actual shapely geometries and optional properties.

  • +
  • cube (DataArray) – a data cube with dimensions (geometries, time, bands) where time and bands are optional. +The coordinates for the geometry dimension are integers and match the index of the geometries in the geometries parameter.

  • +
  • context (dict) – A dictionary containing user context.

  • +
+
+
Return type:
+

(geopandas.geodataframe.GeoDataFrame, DataArray)

+
+
Returns:
+

output geometries, output data cube

+
+
+
+ +
+
+
+

A first example: apply with an UDF to rescale pixel values

+

In most of the examples here, we will start from an initial Sentinel2 data cube like this:

+
s2_cube = connection.load_collection(
+    "SENTINEL2_L2A",
+    spatial_extent={"west": 4.00, "south": 51.04, "east": 4.10, "north": 51.1},
+    temporal_extent=["2022-03-01", "2022-03-31"],
+    bands=["B02", "B03", "B04"]
+)
+
+
+

The raw values in this initial s2_cube data cube are digital numbers +(integer values ranging from 0 to several thousands) +and to get physical reflectance values (float values, typically in the range between 0 and 0.5), +we have to rescale them. +This is a simple local transformation, without any interaction between pixels, +which is the modus operandi of the apply processes.

+
+

Note

+

In practice it will be a lot easier and more efficient to do this kind of rescaling +with pre-defined openEO math processes, for example: s2_cube.apply(lambda x: 0.0001 * x). +This is just a very simple illustration to get started with UDFs. In fact, it’s very likely that +you will never want to use a UDF with apply.

+
+
+

UDF script

+

The UDF code is this short script (the part that does the actual value rescaling is highlighted):

+
+
udf-code.py
+
1import xarray
+2
+3def apply_datacube(cube: xarray.DataArray, context: dict) -> xarray.DataArray:
+4    cube.values = 0.0001 * cube.values
+5    return cube
+
+
+
+

Some details about this UDF script:

+
    +
  • line 1: We import xarray as we use this as exchange format.

  • +
  • line 3: We define a function named apply_datacube, +which receives and returns a DataArray instance. +We follow here the apply_datacube() UDF function signature.

  • +
  • line 4: Because our scaling operation is so simple, we can transform the xarray.DataArray values in-place.

  • +
  • line 5: Consequently, because the values were updated in-place, we can return the same Xarray object.

  • +
+
+
+

Workflow script

+

In this first example, we’ll cite a full, standalone openEO workflow script, +including creating the back-end connection, loading the initial data cube and downloading the result. +The UDF-specific part is highlighted.

+
+

Warning

+

This implementation depends on openeo.UDF improvements +that were introduced in version 0.13.0 of the openeo Python Client Library. +If you are currently stuck with working with an older version, +check openeo.UDF API and usage changes in version 0.13.0 for more information on the difference with the old API.

+
+
+
UDF usage example snippet
+
 1import openeo
+ 2
+ 3# Create connection to openEO back-end
+ 4connection = openeo.connect("...").authenticate_oidc()
+ 5
+ 6# Load initial data cube.
+ 7s2_cube = connection.load_collection(
+ 8    "SENTINEL2_L2A",
+ 9    spatial_extent={"west": 4.00, "south": 51.04, "east": 4.10, "north": 51.1},
+10    temporal_extent=["2022-03-01", "2022-03-31"],
+11    bands=["B02", "B03", "B04"]
+12)
+13
+14# Create a UDF object from inline source code.
+15udf = openeo.UDF("""
+16import xarray
+17
+18def apply_datacube(cube: xarray.DataArray, context: dict) -> xarray.DataArray:
+19    cube.values = 0.0001 * cube.values
+20    return cube
+21""")
+22
+23# Pass UDF object as child process to `apply`.
+24rescaled = s2_cube.apply(process=udf)
+25
+26rescaled.download("apply-udf-scaling.nc")
+
+
+
+

In line 15, we build an openeo.UDF object +from an inline string with the UDF source code. +This openeo.UDF object encapsulates various aspects +that are necessary to create a run_udf node in the process graph, +and we can pass it directly in line 25 as the process argument +to DataCube.apply().

+
+

Tip

+

Instead of putting your UDF code in an inline string like in the example, +it’s often a good idea to load the UDF code from a separate file, +which is easier to maintain in your preferred editor or IDE. +You can do that directly with the +openeo.UDF.from_file method:

+
udf = openeo.UDF.from_file("udf-code.py")
+
+
+
+

After downloading the result, we can inspect the band values locally. +Note see that they fall mainly in a range from 0 to 1 (in most cases even below 0.2), +instead of the original digital number range (thousands):

+_images/apply-rescaled-histogram.png +
+
+
+

UDF’s that transform cube metadata

+

This is a new/experimental feature so may still be subject to change.

+

In some cases, a UDF can have impact on the metadata of a cube, but this can not always +be easily inferred by process graph evaluation logic without running the actual +(expensive) UDF code. This limits the possibilities to validate process graphs, +or for instance make an estimate of the size of a datacube after applying a UDF.

+

To provide evaluation logic with this information, the user should implement the +apply_metadata() function as part of the UDF. +Please refer to the documentation of that function for more information.

+
+
Example of a UDF that adjusts spatial metadata udf_modify_spatial.py
+
import xarray
+from openeo.udf import XarrayDataCube
+from openeo.udf.debug import inspect
+from openeo.metadata import CollectionMetadata
+import numpy as np
+
+def apply_metadata(input_metadata:CollectionMetadata, context:dict) -> CollectionMetadata:
+
+    xstep = input_metadata.get('x','step')
+    ystep = input_metadata.get('y','step')
+    new_metadata = {
+          "x": {"type": "spatial", "axis": "x", "step": xstep/2.0, "reference_system": 4326},
+          "y": {"type": "spatial", "axis": "y", "step": ystep/2.0, "reference_system": 4326},
+          "t": {"type": "temporal"}
+    }
+    return CollectionMetadata(new_metadata)
+
+def fancy_upsample_function(array: np.array, factor: int = 2) -> np.array:
+    assert array.ndim == 3
+    return array.repeat(factor, axis=-1).repeat(factor, axis=-2)
+
+def apply_datacube(cube: XarrayDataCube, context: dict) -> XarrayDataCube:
+    array: xarray.DataArray = cube.get_array()
+
+    cubearray: xarray.DataArray = cube.get_array().copy() + 60
+
+    # We make prediction and transform numpy array back to datacube
+
+    # Pixel size of the original image
+    init_pixel_size_x = cubearray.coords['x'][-1] - cubearray.coords['x'][-2]
+    init_pixel_size_y = cubearray.coords['y'][-1] - cubearray.coords['y'][-2]
+
+    if cubearray.data.ndim == 4 and cubearray.data.shape[0] == 1:
+        cubearray = cubearray[0]
+    predicted_array = fancy_upsample_function(cubearray.data, 2)
+    inspect(predicted_array, "test message")
+    coord_x = np.linspace(start=cube.get_array().coords['x'].min(), stop=cube.get_array().coords['x'].max() + init_pixel_size_x,
+                          num=predicted_array.shape[-2], endpoint=False)
+    coord_y = np.linspace(start=cube.get_array().coords['y'].min(), stop=cube.get_array().coords['y'].max() + init_pixel_size_y,
+                          num=predicted_array.shape[-1], endpoint=False)
+    predicted_cube = xarray.DataArray(predicted_array, dims=['bands', 'x', 'y'], coords=dict(x=coord_x, y=coord_y))
+
+
+    return XarrayDataCube(predicted_cube)
+
+
+
+

To invoke a UDF like this, the apply_neighborhood method is most suitable:

+
udf_code = Path('udf_modify_spatial.py').read_text()
+cube_updated = cube.apply_neighborhood(
+    lambda data: data.run_udf(udf=udf_code, runtime='Python-Jep', context=dict()),
+    size=[
+        {'dimension': 'x', 'value': 128, 'unit': 'px'},
+        {'dimension': 'y', 'value': 128, 'unit': 'px'}
+    ], overlap=[])
+
+
+
+
+

Example: apply_dimension with a UDF

+

This is useful when running custom code over all band values for a given pixel or all observations per pixel. +See section below ‘Smoothing timeseries with a user defined function’ for a concrete example.

+
+
+

Example: reduce_dimension with a UDF

+

The key element for a UDF invoked in the context of reduce_dimension is that it should actually return +an Xarray DataArray _without_ the dimension that is specified to be reduced.

+

So a reduce over time would receive a DataArray with bands,t,y,x dimensions, and return one with only bands,y,x.

+
+
+

Example: apply_neighborhood with a UDF

+

The apply_neighborhood process is generally used when working with complex AI models that require a +spatiotemporal input stack with a fixed size. It supports the ability to specify overlap, to ensure that the model +has sufficient border information to generate a spatially coherent output across chunks of the raster data cube.

+

In the example below, the UDF will receive chunks of 128x128 pixels: 112 is the chunk size, while 2 times 8 pixels of +overlap on each side of the chunk results in 128.

+

The time and band dimensions are not specified, which means that all values along these dimensions are passed into +the datacube.

+
output_cube = inputs_cube.apply_neighborhood(my_udf, size=[
+        {'dimension': 'x', 'value': 112, 'unit': 'px'},
+        {'dimension': 'y', 'value': 112, 'unit': 'px'}
+    ], overlap=[
+        {'dimension': 'x', 'value': 8, 'unit': 'px'},
+        {'dimension': 'y', 'value': 8, 'unit': 'px'}
+    ])
+
+
+

The apply_neighborhood is the most versatile, but also most complex process. Make sure to keep an eye on the dimensions +and the shape of the DataArray returned by your UDF. For instance, a very common error is to somehow ‘flip’ the spatial dimensions. +Debugging the UDF locally can help, but then you will want to try and reproduce the input that you get also on the backend. +This can typically be achieved by using logging to inspect the DataArrays passed into your UDF backend side.

+
+
+

Example: Smoothing timeseries with a user defined function (UDF)

+

In this example, we start from the evi_cube that was created in the previous example, and want to +apply a temporal smoothing on it. More specifically, we want to use the “Savitzky Golay” smoother +that is available in the SciPy Python library.

+

To ensure that openEO understand your function, it needs to follow some rules, the UDF specification. +This is an example that follows those rules:

+
+
Example UDF code smooth_savitzky_golay.py
+
import xarray
+from scipy.signal import savgol_filter
+
+from openeo.udf import XarrayDataCube
+
+
+def apply_datacube(cube: XarrayDataCube, context: dict) -> XarrayDataCube:
+    """
+    Apply Savitzky-Golay smoothing to a timeseries datacube.
+    This UDF preserves dimensionality, and assumes an input
+    datacube with a temporal dimension 't' as input.
+    """
+    array: xarray.DataArray = cube.get_array()
+    filled = array.interpolate_na(dim='t')
+    smoothed_array = savgol_filter(filled.values, 5, 2, axis=0)
+    return XarrayDataCube(
+        array=xarray.DataArray(smoothed_array, dims=array.dims, coords=array.coords)
+    )
+
+
+
+

The method signature of the UDF is very important, because the back-end will use it to detect +the type of UDF. +This particular example accepts a DataCube object as input and also returns a DataCube object. +The type annotations and method name are actually used to detect how to invoke the UDF, so make sure they remain unchanged.

+

Once the UDF is defined in a separate file, we load it +and apply it along a dimension:

+
smoothing_udf = openeo.UDF.from_file('smooth_savitzky_golay.py')
+smoothed_evi = evi_cube_masked.apply_dimension(smoothing_udf, dimension="t")
+
+
+
+
+

Downloading a datacube and executing an UDF locally

+

Sometimes it is advantageous to run a UDF on the client machine (for example when developing/testing that UDF). +This is possible by using the convenience function openeo.udf.run_code.execute_local_udf(). +The steps to run a UDF (like the code from smooth_savitzky_golay.py above) are as follows:

+ +

For example:

+
from pathlib import Path
+from openeo.udf import execute_local_udf
+
+my_process = connection.load_collection(...
+
+my_process.download('test_input.nc', format='NetCDF')
+
+smoothing_udf = Path('smooth_savitzky_golay.py').read_text()
+execute_local_udf(smoothing_udf, 'test_input.nc', fmt='netcdf')
+
+
+

Note: this algorithm’s primary purpose is to aid client side development of UDFs using small datasets. It is not designed for large jobs.

+
+
+

UDF dependency management

+

UDFs usually have some dependencies on existing libraries, e.g. to implement complex algorithms. +In case of Python UDFs, it can be assumed that common libraries like numpy and Xarray are readily available, +not in the least because they underpin the Python UDF function signatures. +More concretely, it is possible to inspect available libraries for the available UDF runtimes +through Connection.list_udf_runtimes(). +For example, to list the available libraries for runtime “Python” (version “3”):

+
>>> connection.list_udf_runtimes()["Python"]["versions"]["3"]["libraries"]
+{'geopandas': {'version': '0.13.2'},
+ 'numpy': {'version': '1.22.4'},
+ 'xarray': {'version': '0.16.2'},
+ ...
+
+
+

Managing and using additional dependencies or libraries that are not provided out-of-the-box by a backend +is a more challenging problem and the practical details can vary between backends.

+
+

Standard for declaring Python UDF dependencies

+
+

Warning

+

This is based on a fairly recent standard and it might not be supported by your chosen backend yet.

+
+

PEP 723 “Inline script metadata” defines a standard +for Python scripts to declare dependencies inside a top-level comment block. +If the openEO backend of your choice supports this standard, it is the preferred approach +to declare the (import) dependencies of your Python UDF:

+
    +
  • It avoids all the overhead for the UDF developer +to correctly and efficiently make desired dependencies available in the UDF.

  • +
  • It allows the openEO backend to optimize dependencies handling.

  • +
+
+

Warning

+

An openEO backend might only support this automatic UDF dependency handling feature +in batch jobs (because of their isolated nature), +but not for synchronous processing requests.

+
+
+

Declaration of UDF dependencies

+

A basic example of how the UDF dependencies can be declared in top-level comment block of your Python UDF:

+
# /// script
+# dependencies = [
+#   "geojson",
+#   "fancy-eo-library",
+# ]
+# ///
+#
+# This openEO UDF script implements ...
+# based on the fancy-eo-library ... using geosjon data ...
+
+import geojson
+import fancyeo
+
+def apply_datacube(cube: xarray.DataArray, context: dict) -> xarray.DataArray:
+    ...
+
+
+

Some considerations to make sure you have a valid metadata block:

+
    +
  • Lines start with a single hash # and one space (the space can be omitted if the # is the only character on the line).

  • +
  • The metadata block starts with a line # /// script and ends with # ///.

  • +
  • Between these delimiters you put the metadata fields in TOML format, +each line prefixed with # and a space.

  • +
  • Declare your UDF’s dependencies in a dependencies field as a TOML array. +List each package on a separate line as shown above, or put them all on a single line. +It is also allowed to include comments, as long as the whole construct is valid TOML.

  • +
  • Each dependencies entry must be a valid PEP 508 dependency specifier. +This practically means to use the package names (optionally with version constraints) +as expected by the pip install command.

  • +
+

A more complex example to illustrate some more advanced aspects of the metadata block:

+
# /// script
+# dependencies = [
+#   # A comment about using at least version 2.5.0
+#   'geojson>=2.5.0',  # An inline comment
+#   # Note that TOML allows both single and double quotes for strings.
+#
+#   # Install a package "fancyeo" from a (ZIP) source archive URL.
+#   "fancyeo @ https://github.com/fncy/fancyeo/archive/refs/tags/v3.2.0-alpha1.zip",
+#   # Or from a wheel URL, including a content hash to be verified before installing.
+#   "lousyeo @ https://example.com/lousyeo-6.6.6-py3-none-any.whl#sha1=4bbb3c72a9234ee998a6de940a148e346a",
+#   # Note that the last entry may have a trailing comma.
+# ]
+# ///
+
+
+
+
+

Verification

+

Use extract_udf_dependencies() to verify +that your metadata block can be parsed correctly:

+
>>> from openeo.udf.run_code import extract_udf_dependencies
+>>> extract_udf_dependencies(udf_code)
+['geojson>=2.5.0',
+ 'fancyeo @ https://github.com/fncy/fancyeo/archive/refs/tags/v3.2.0-alpha1.zip',
+ 'lousyeo @ https://example.com/lousyeo-6.6.6-py3-none-any.whl#sha1=4bbb3c72a9234ee998a6de940a148e346a']
+
+
+

If no valid metadata block is found, None will be returned.

+
+

Note

+

This function won’t necessarily raise exceptions for syntax errors in the metadata block. +It might just fail to reliably detect anything and skip it as regular comment lines.

+
+
+
+
+

Ad-hoc dependency handling

+

If dependency handling through standardized UDF declarations is not supported by the backend, +there are still ways to manually handle additional dependencies in your UDF. +The exact details can vary between backends, but we can give some general pointers here:

+
    +
  • Multiple Python dependencies can be packaged fairly easily by zipping a Python virtual environment.

  • +
  • For some dependencies, it can be important that the Python major version of the virtual environment is the same as the one used by the backend.

  • +
  • Python allows you to dynamically append (or prepend) libraries to the search path: sys.path.append("unzipped_virtualenv_location")

  • +
+
+
+
+

Profile a process server-side

+
+

Warning

+

Experimental feature - This feature only works on back-ends running the Geotrellis implementation, and has not yet been +adopted in the openEO API.

+
+

Sometimes users want to ‘profile’ their UDF on the back-end. While it’s recommended to first profile it offline, in the +same manner as you can debug UDF’s, back-ends may support profiling directly. +Note that this will only generate statistics over the python part of the execution, therefore it is only suitable for profiling UDFs.

+
+

Usage

+

Only batch jobs are supported! In order to turn on profiling, set ‘profile’ to ‘true’ in job options:

+
job_options={'profile':'true'}
+... # prepare the process
+process.execute_batch('result.tif',job_options=job_options)
+
+
+

When the process has finished, it will also download a file called ‘profile_dumps.tar.gz’:

+
    +
  • rdd_-1.pstats is the profile data of the python driver,

  • +
  • the rest are the profiling results of the individual rdd id-s (that can be correlated with the execution using the SPARK UI).

  • +
+
+
+

Viewing profiling information

+

The simplest way is to visualize the results with a graphical visualization tool called kcachegrind. +In order to do that, install kcachegrind packages (most linux distributions have it installed by default) and it’s python connector pyprof2calltree. +From command line run:

+
pyprof2calltree rdd_<INTERESTING_RDD_ID>.pstats.
+
+
+

Another way is to use the builtin pstats functionality from within python:

+
import pstats
+p = pstats.Stats('restats')
+p.print_stats()
+
+
+
+
+

Example

+

An example code can be found here .

+
+
+
+

Logging from a UDF

+

From time to time, when things are not working as expected, +you may want to log some additional debug information from your UDF, inspect the data that is being processed, +or log warnings. +This can be done using the inspect() function.

+

For example: to discover the shape of the data cube chunk that you receive in your UDF function:

+
+
Sample UDF code with inspect() logging
+
from openeo.udf import inspect
+import xarray
+
+def apply_datacube(cube: xarray.DataArray, context: dict) -> xarray.DataArray:
+    inspect(data=[cube.shape], message="UDF logging shape of my cube")
+    cube.values = 0.0001 * cube.values
+    return cube
+
+
+
+

After the batch job is finished (or failed), you can find this information in the logs of the batch job. +For example (as explained at Batch job logs), +use BatchJob.logs() in a Jupyter notebook session +to retrieve and filter the logs interactively:

+_images/logging_arrayshape.png +

Which reveals in this example a chunking shape of [3, 256, 256].

+
+

Note

+

Not all kinds of data (types) are accepted/supported by the data argument of inspect, +so you might have to experiment a bit to make sure the desired debug information is logged as desired.

+
+
+
+

openeo.UDF API and usage changes in version 0.13.0

+

Prior to version 0.13.0 of the openEO Python Client Library, +loading and working with UDFs was a bit inconsistent and cumbersome.

+
    +
  • The old openeo.UDF() required an explicit runtime argument, which was usually "Python". +In the new openeo.UDF, the runtime argument is optional, +and it will be auto-detected (from the source code or file extension) when not given.

  • +
  • The old openeo.UDF() required an explicit data argument, and figuring out the correct +value (e.g. something like {"from_parameter": "x"}) required good knowledge of the openEO API and processes. +With the new openeo.UDF it is not necessary anymore to provide +the data argument. In fact, while the data argument is only still there for compatibility reasons, +it is unused and it will be removed in a future version. +A deprecation warning will be triggered when data is given a value.

  • +
  • DataCube.apply_dimension() has direct UDF support through +code and runtime arguments, preceding the more generic and standard process argument, while +comparable methods like DataCube.apply() +or DataCube.reduce_dimension() +only support a process argument with no dedicated arguments for UDFs.

    +

    The goal is to improve uniformity across all these methods and use a generic process argument everywhere +(that also supports a openeo.UDF object for UDF use cases). +For now, the code, runtime and version arguments are still present +in DataCube.apply_dimension() +as before, but usage is deprecated.

    +

    Simple example to sum it up:

    +
    udf_code = """
    +...
    +def apply_datacube(cube, ...
    +"""
    +
    +# Legacy `apply_dimension` usage: still works for now,
    +# but it will trigger a deprecation warning.
    +cube.apply_dimension(code=udf_code, runtime="Python", dimension="t")
    +
    +# New, preferred approach with a standard `process` argument.
    +udf = openeo.UDF(udf_code)
    +cube.apply_dimension(process=udf, dimension="t")
    +
    +# Unchanged: usage of other apply/reduce/... methods
    +cube.apply(process=udf)
    +cube.reduce_dimension(reducer=udf, dimension="t")
    +
    +
    +
  • +
+
+
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/udp.html b/udp.html new file mode 100644 index 000000000..3863264de --- /dev/null +++ b/udp.html @@ -0,0 +1,608 @@ + + + + + + + + User-Defined Processes (UDP) — openEO Python Client 0.36.0 documentation + + + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +
+

User-Defined Processes (UDP)

+
+

Code reuse with user-defined processes

+

As explained before, processes can be chained together in a process graph +to build a certain algorithm. +Often, you have certain (sub)chains that reoccur in the same process graph +of even in different process graphs or algorithms.

+

The openEO API enables you to store such (sub)chains +on the back-end as a so called user-defined process. +This allows you to build your own library of reusable building blocks.

+
+

Warning

+

Do not confuse user-defined processes (sometimes abbreviated as UDP) with +user-defined functions (UDF) in openEO, which is a mechanism to +inject Python or R scripts as process nodes in a process graph. +See User-Defined Functions (UDF) explained for more information.

+
+

A user-defined process can not only be constructed from +pre-defined processes provided by the back-end, +but also other user-defined processes.

+

Ultimately, the openEO API allows you to publicly expose your user-defined process, +so that other users can invoke it as a service. +This turns your openEO process into a web application +that can be executed using the regular openEO +support for synchronous and asynchronous jobs.

+
+
+

Process Parameters

+

User-defined processes are usually parameterized, +meaning certain inputs are expected when calling the process.

+

For example, if you often have to convert Fahrenheit to Celsius:

+
c = (f - 32) / 1.8
+
+
+

you could define a user-defined process fahrenheit_to_celsius, +consisting of two simple mathematical operations +(pre-defined processes subtract and divide).

+

We can represent this in openEO’s JSON based format as follows +(don’t worry too much about the syntax details of this representation, +the openEO Python client will hide this usually):

+
{
+    "subtract32": {
+        "process_id": "subtract",
+        "arguments": {"x": {"from_parameter": "fahrenheit"}, "y": 32}
+    },
+    "divide18": {
+        "process_id": "divide",
+        "arguments": {"x": {"from_node": "subtract32"}, "y": 1.8},
+        "result": true
+    }
+}
+
+
+

The important point here is the parameter reference {"from_parameter": "fahrenheit"} in the subtraction. +When we call this user-defined process we will have to provide a Fahrenheit value. +For example with 70 degrees Fahrenheit (again in openEO JSON format here):

+
{
+    "process_id": "fahrenheit_to_celsius",
+    "arguments" {"fahrenheit": 70}
+}
+
+
+
+

Declaring Parameters

+

It’s good style to declare what parameters your user-defined process expects and supports. +It allows you to document your parameters, define the data type(s) you expect +(the “schema” in openEO-speak) and define default values.

+

The openEO Python client lets you define parameters as +Parameter instances. +In general you have to specify at least the parameter name, +a description and a schema (to declare the expected parameter type). +The “fahrenheit” parameter from the example above can be defined like this:

+
from openeo.api.process import Parameter
+
+fahrenheit_param = Parameter(
+    name="fahrenheit",
+    description="Degrees Fahrenheit",
+    schema={"type": "number"}
+)
+
+
+

To simplify working with parameter schemas, the Parameter class +provides a couple of helpers to create common types of parameters. +In the example above, the “fahrenheit” parameter (a number) can also be created more compactly +with the Parameter.number() helper:

+
fahrenheit_param = Parameter.number(
+    name="fahrenheit", description="Degrees Fahrenheit"
+)
+
+
+

Some useful parameter helpers (class methods of the Parameter class):

+ +

Consult the documentation of these helper class methods for additional features. +For example, declaring a default value for an integer parameter:

+
size_param = Parameter.integer(
+    name="size", description="Kernel size", default=4
+)
+
+
+
+
+

More advanced parameter schemas

+

While the helper class methods of Parameter (discussed above) +cover the most common parameter usage, +you also might need to declare some parameters with a more special or specific schema. +You can do that through the schema argument +of the basic Parameter() constructor. +This “schema” argument follows the JSON Schema draft-07 specification, +which we will briefly illustrate here.

+

Basic primitives can be declared through a (required) “type” field, for example: +{"type": "string"} for strings, {"type": "integer"} for integers, etc.

+

Likewise, arrays can be defined with a minimal {"type": "array"}. +In addition, the expected type of the array items can also be specified, +e.g. an array of integers:

+
{
+    "type": "array",
+    "items": {"type": "integer"}
+}
+
+
+

Another, more complex type is {"type": "object"} for parameters +that are like Python dictionaries (or mappings). +For example, to define a bounding box parameter +that should contain certain fields with certain type:

+
{
+    "type": "object",
+    "properties": {
+        "west": {"type": "number"},
+        "south": {"type": "number"},
+        "east": {"type": "number"},
+        "north": {"type": "number"},
+        "crs": {"type": "string"}
+    }
+}
+
+
+

Check the documentation and examples of JSON Schema draft-07 +for even more features.

+

On top of these generic types, the openEO API also defines a couple of custom (sub)types +in the openeo-processes project +(see the meta/subtype-schemas.json listing). +For example, the schema of an openEO data cube is:

+
{
+    "type": "object",
+    "subtype": "datacube"
+}
+
+
+
+
+
+

Building and storing user-defined process

+

There are a couple of ways to build and store user-defined processes:

+ +
+

Through “process functions”

+

The openEO Python Client Library defines the +official processes in the openeo.processes module, +which can be used to build a process graph as follows:

+
from openeo.processes import subtract, divide
+from openeo.api.process import Parameter
+
+# Define the input parameter.
+f = Parameter.number("f", description="Degrees Fahrenheit.")
+
+# Do the calculations, using the parameter and other values
+fahrenheit_to_celsius = divide(x=subtract(x=f, y=32), y=1.8)
+
+# Store user-defined process in openEO back-end.
+connection.save_user_defined_process(
+    "fahrenheit_to_celsius",
+    fahrenheit_to_celsius,
+    parameters=[f]
+)
+
+
+

The fahrenheit_to_celsius object encapsulates the subtract and divide calculations in a symbolic way. +We can pass it directly to save_user_defined_process().

+

If you want to inspect its openEO-style process graph representation, +use the to_json() +or print_json() method:

+
>>> fahrenheit_to_celsius.print_json()
+{
+  "process_graph": {
+    "subtract1": {
+      "process_id": "subtract",
+      "arguments": {
+        "x": {
+          "from_parameter": "f"
+        },
+        "y": 32
+      }
+    },
+    "divide1": {
+      "process_id": "divide",
+      "arguments": {
+        "x": {
+          "from_node": "subtract1"
+        },
+        "y": 1.8
+      },
+      "result": true
+    }
+  }
+}
+
+
+
+
+

From a parameterized data cube

+

It’s also possible to work with a DataCube directly +and parameterize it. +Let’s create, as a simple but functional example, a custom load_collection +with hardcoded collection id and band name +and a parameterized spatial extent (with default):

+
spatial_extent = Parameter(
+    name="bbox",
+    schema="object",
+    default={"west": 3.7, "south": 51.03, "east": 3.75, "north": 51.05}
+)
+
+cube = connection.load_collection(
+    "SENTINEL2_L2A_SENTINELHUB",
+    spatial_extent=spatial_extent,
+    bands=["B04"]
+)
+
+
+

Note how we just can pass Parameter objects as arguments +while building a DataCube.

+
+

Note

+

Not all DataCube methods/processes properly support +Parameter arguments. +Please submit a bug report when you encounter missing or wrong parameterization support.

+
+

We can now store this as a user-defined process called “fancy_load_collection” on the back-end:

+
connection.save_user_defined_process(
+    "fancy_load_collection",
+    cube,
+    parameters=[spatial_extent]
+)
+
+
+

If you want to inspect its openEO-style process graph representation, +use the to_json() +or print_json() method:

+
>>> cube.print_json()
+{
+  "loadcollection1": {
+    "process_id": "load_collection",
+    "arguments": {
+      "id": "SENTINEL2_L2A_SENTINELHUB",
+      "bands": [
+        "B04"
+      ],
+      "spatial_extent": {
+        "from_parameter": "bbox"
+      },
+      "temporal_extent": null
+    },
+    "result": true
+  }
+}
+
+
+
+
+

Using a predefined dictionary

+

In some (advanced) situation, you might already have +the process graph in dictionary format +(or JSON format, which is very close and easy to transform). +Another developer already prepared it for you, +or you prefer to fine-tune process graphs in a JSON editor. +It is very straightforward to submit this as a user-defined process.

+

Say we start from the following Python dictionary, +representing the Fahrenheit to Celsius conversion we discussed before:

+
fahrenheit_to_celsius = {
+    "subtract1": {
+        "process_id": "subtract",
+        "arguments": {"x": {"from_parameter": "f"}, "y": 32}
+    },
+    "divide1": {
+        "process_id": "divide",
+        "arguments": {"x": {"from_node": "subtract1"}, "y": 1.8},
+        "result": True
+    }}
+
+
+

We can store this directly, taking into account that we have to define +a parameter named f corresponding with the {"from_parameter": "f"} argument +from the dictionary above:

+
connection.save_user_defined_process(
+    user_defined_process_id="fahrenheit_to_celsius",
+    process_graph=fahrenheit_to_celsius,
+    parameters=[Parameter.number(name="f", description="Degrees Fahrenheit")]
+)
+
+
+
+
+

Store to a file

+

Some use cases might require storing the user-defined process in, +for example, a JSON file instead of storing it directly on a back-end. +Use build_process_dict() to build a dictionary +compatible with the “process graph with metadata” format of the openEO API +and dump it in JSON format to a file:

+
import json
+from openeo.rest.udp import build_process_dict
+from openeo.processes import subtract, divide
+from openeo.api.process import Parameter
+
+fahrenheit = Parameter.number("f", description="Degrees Fahrenheit.")
+fahrenheit_to_celsius = divide(x=subtract(x=fahrenheit, y=32), y=1.8)
+
+spec = build_process_dict(
+    process_id="fahrenheit_to_celsius",
+    process_graph=fahrenheit_to_celsius,
+    parameters=[fahrenheit]
+)
+
+with open("fahrenheit_to_celsius.json", "w") as f:
+    json.dump(spec, f, indent=2)
+
+
+

This results in a JSON file like this:

+
{
+  "id": "fahrenheit_to_celsius",
+  "process_graph": {
+    "subtract1": {
+      "process_id": "subtract",
+       ...
+  "parameters": [
+    {
+      "name": "f",
+      ...
+
+
+
+
+
+

Evaluate user-defined processes

+

Let’s evaluate the user-defined processes we defined.

+

Because there is no pre-defined +wrapper function for our user-defined process, we use the +generic openeo.processes.process() function to build a simple +process graph that calls our fahrenheit_to_celsius process:

+
>>> pg = openeo.processes.process("fahrenheit_to_celsius", f=70)
+>>> pg.print_json(indent=None)
+{"process_graph": {"fahrenheittocelsius1": {"process_id": "fahrenheit_to_celsius", "arguments": {"f": 70}, "result": true}}}
+
+>>> res = connection.execute(pg)
+>>> print(res)
+21.11111111111111
+
+
+

To use our custom fancy_load_collection process, +we only have to specify a temporal extent, +and let the predefined and default values do their work. +We will use datacube_from_process() +to construct a DataCube object +which we can process further and download:

+
cube = connection.datacube_from_process("fancy_load_collection")
+cube = cube.filter_temporal("2020-09-01", "2020-09-10")
+cube.download("fancy.tiff", format="GTiff")
+
+
+

See Construct DataCube from process for more information on datacube_from_process().

+
+
+

UDP Example: EVI timeseries

+

In this UDP example, we’ll build a reusable UDP evi_timeseries +to calculate the EVI timeseries for a given geometry. +It’s a simplified version of the EVI workflow laid out in Example use case: EVI map and timeseries, +focussing on the UDP-specific aspects: defining and using parameters; +building, storing, and finally executing the UDP.

+
import openeo
+from openeo.api.process import Parameter
+
+# Create connection to openEO back-end
+connection = openeo.connect("...").authenticate_oidc()
+
+# Declare the UDP parameters
+temporal_extent = Parameter(
+    name="temporal_extent",
+    description="The date range to calculate the EVI for.",
+    schema={"type": "array", "subtype": "temporal-interval"},
+    default =["2018-06-15", "2018-06-27"]
+)
+geometry = Parameter(
+    name="geometry",
+    description="The geometry (a single (multi)polygon or a feature collection of (multi)polygons) of to calculate the EVI for.",
+    schema={"type": "object", "subtype": "geojson"}
+)
+
+# Load raw SENTINEL2_L2A data
+sentinel2_cube = connection.load_collection(
+    "SENTINEL2_L2A",
+    temporal_extent=temporal_extent,
+    bands=["B02", "B04", "B08"],
+)
+
+# Extract spectral bands and calculate EVI with the "band math" feature
+blue = sentinel2_cube.band("B02") * 0.0001
+red = sentinel2_cube.band("B04") * 0.0001
+nir = sentinel2_cube.band("B08") * 0.0001
+evi = 2.5 * (nir - red) / (nir + 6.0 * red - 7.5 * blue + 1.0)
+
+evi_aggregation = evi.aggregate_spatial(
+    geometries=geometry,
+    reducer="mean",
+)
+
+# Store the parameterized user-defined process at openEO back-end.
+process_id = "evi_timeseries"
+connection.save_user_defined_process(
+    user_defined_process_id=process_id,
+    process_graph=evi_aggregation,
+    parameters=[temporal_interval, geometry],
+)
+
+
+

When this UDP evi_timeseries is successfully stored on the back-end, +we can use it through datacube_from_process() +to get the EVI timeseries of a desired geometry and time window:

+
time_window = ["2020-01-01", "2021-12-31"]
+geometry = {
+    "type": "Polygon",
+    "coordinates": [[[5.1793, 51.2498], [5.1787, 51.2467], [5.1852, 51.2450], [5.1867, 51.2453], [5.1873, 51.2491], [5.1793, 51.2498]]],
+  }
+
+evi_timeseries = connection.datacube_from_process(
+    process_id="evi_timeseries",
+    temporal_extent=time_window,
+    geometry=geometry,
+)
+
+evi_timeseries.download("evi-aggregation.json")
+
+
+
+
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file