From 9907d8c997f851734a48576ad8f8b4d54fb36cd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dion=20H=C3=A4fner?= Date: Tue, 27 Nov 2018 15:28:09 +0100 Subject: [PATCH 1/6] more docs; more docstrings; more tests --- docs/api.rst | 46 +++++++-------- docs/cli-commands/optimize-rasters.rst | 2 +- docs/cli.rst | 2 +- docs/conf.py | 12 +++- docs/drivers/mysql.rst | 7 +++ docs/drivers/sqlite-remote.rst | 8 +++ docs/drivers/sqlite.rst | 7 +++ docs/example-ingestion-script.py | 64 +++++++++++++++++++++ docs/get-started.rst | 80 ++++---------------------- docs/index.rst | 29 ++++++---- docs/settings.rst | 36 ++++++++++-- terracotta/__init__.py | 27 ++++++++- terracotta/config.py | 2 +- terracotta/drivers/__init__.py | 63 ++++++++++++++------ terracotta/drivers/mysql.py | 15 ++++- terracotta/drivers/raster_base.py | 4 +- terracotta/drivers/sqlite.py | 33 +++++++---- terracotta/drivers/sqlite_remote.py | 7 +-- terracotta/image.py | 2 + terracotta/logs.py | 2 +- terracotta/scripts/cli.py | 10 +++- terracotta/scripts/connect.py | 7 ++- terracotta/scripts/ingest.py | 8 +-- terracotta/scripts/optimize_rasters.py | 2 +- terracotta/scripts/serve.py | 2 +- terracotta/server/flask_api.py | 1 - tests/drivers/test_drivers.py | 18 ++++-- tests/drivers/test_raster_drivers.py | 8 +-- tests/handlers/test_rgb.py | 8 +++ tests/server/test_flask_api.py | 5 ++ tests/test_config.py | 13 +++++ tests/test_image.py | 16 ++++-- 32 files changed, 364 insertions(+), 182 deletions(-) create mode 100644 docs/drivers/mysql.rst create mode 100644 docs/drivers/sqlite-remote.rst create mode 100644 docs/drivers/sqlite.rst create mode 100644 docs/example-ingestion-script.py diff --git a/docs/api.rst b/docs/api.rst index 5a19716f..3cdffdb6 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -1,43 +1,37 @@ -.. _api: - Python API ========== -Top-level interface -------------------- - -.. autofunction:: terracotta.get_driver +Get and set runtime settings +---------------------------- .. autofunction:: terracotta.get_settings .. autofunction:: terracotta.update_settings -Drivers -------- +Get a driver instance +--------------------- -Common interface -++++++++++++++++ +.. autofunction:: terracotta.get_driver -.. autoclass:: terracotta.drivers.base.Driver - :members: +Driver interface +---------------- -Available drivers -+++++++++++++++++ +.. seealso:: -SQLite -~~~~~~ + The following class defines the common interface for all Terracotta + drivers. For a reference on a specific drivers refer to :ref:`available-drivers`. -.. autoclass:: terracotta.drivers.sqlite.SQLiteDriver - :members: __init__ +.. autoclass:: terracotta.drivers.base.Driver + :members: -Remote SQLite on S3 -~~~~~~~~~~~~~~~~~~~ +.. _available-drivers: -.. autoclass:: terracotta.drivers.sqlite_remote.RemoteSQLiteDriver - :members: __init__ +Available drivers +----------------- -MySQL -~~~~~ +.. toctree:: + :maxdepth: 1 -.. autoclass:: terracotta.drivers.mysql.MySQLDriver - :members: __init__ \ No newline at end of file + drivers/sqlite + drivers/sqlite-remote + drivers/mysql diff --git a/docs/cli-commands/optimize-rasters.rst b/docs/cli-commands/optimize-rasters.rst index 4568ff13..c303688d 100644 --- a/docs/cli-commands/optimize-rasters.rst +++ b/docs/cli-commands/optimize-rasters.rst @@ -1,2 +1,2 @@ .. click:: terracotta.scripts.cli:optimize_rasters - :prog: terracotta optimize-rasters \ No newline at end of file + :prog: terracotta optimize-rasters diff --git a/docs/cli.rst b/docs/cli.rst index ab03a040..f8b1b734 100644 --- a/docs/cli.rst +++ b/docs/cli.rst @@ -5,7 +5,7 @@ Command-line interface .. toctree:: :maxdepth: 1 - :caption: Available commands + :caption: Available commands: cli-commands/main cli-commands/optimize-rasters diff --git a/docs/conf.py b/docs/conf.py index b789afac..319f3a71 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -18,6 +18,7 @@ # -- Project information ----------------------------------------------------- +import re from terracotta import __version__ project = 'Terracotta' @@ -25,7 +26,7 @@ author = 'Dion Häfner, Philip Graae' # The short X.Y version -version = __version__ +version = re.match(r'(\d+\.\d+\.\d+)', __version__).group(1) # The full version, including alpha/beta/rc tags release = __version__ @@ -40,6 +41,7 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ + 'sphinx.ext.intersphinx', 'sphinx.ext.autodoc', 'sphinx.ext.napoleon', 'sphinx.ext.viewcode', @@ -74,6 +76,10 @@ # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'friendly' +# -- Extension settings -------------------------------------------------------- + +autodoc_member_order = 'bysource' +intersphinx_mapping = {'python': ('https://docs.python.org/3', None)} # -- Options for HTML output ------------------------------------------------- @@ -93,13 +99,15 @@ 'font_family': "'Lato', 'minion pro', 'bell mt', Georgia, 'Hiragino Mincho Pro', sans-serif", 'head_font_family': "'Lato', 'Garamond', 'Georgia', sans-serif", 'body_text': '#000', + 'sidebar_header': '#4B4032', + 'sidebar_text': '#49443E', 'github_banner': 'true', 'github_user': 'DHI-GRAS', 'github_repo': 'terracotta', 'github_button': 'true', 'github_type': 'star', 'travis_button': 'true', - 'codecov_button': 'true' + 'codecov_button': 'true', } diff --git a/docs/drivers/mysql.rst b/docs/drivers/mysql.rst new file mode 100644 index 00000000..700a7a88 --- /dev/null +++ b/docs/drivers/mysql.rst @@ -0,0 +1,7 @@ +MySQL +===== + +.. autoclass:: terracotta.drivers.mysql.MySQLDriver + :members: + :undoc-members: + :inherited-members: diff --git a/docs/drivers/sqlite-remote.rst b/docs/drivers/sqlite-remote.rst new file mode 100644 index 00000000..629a8467 --- /dev/null +++ b/docs/drivers/sqlite-remote.rst @@ -0,0 +1,8 @@ +Remote SQLite +============= + +.. autoclass:: terracotta.drivers.sqlite_remote.RemoteSQLiteDriver + :members: + :undoc-members: + :inherited-members: + :exclude-members: delete, insert, create \ No newline at end of file diff --git a/docs/drivers/sqlite.rst b/docs/drivers/sqlite.rst new file mode 100644 index 00000000..a167771a --- /dev/null +++ b/docs/drivers/sqlite.rst @@ -0,0 +1,7 @@ +SQLite +====== + +.. autoclass:: terracotta.drivers.sqlite.SQLiteDriver + :members: + :undoc-members: + :inherited-members: diff --git a/docs/example-ingestion-script.py b/docs/example-ingestion-script.py new file mode 100644 index 00000000..f7f4a34b --- /dev/null +++ b/docs/example-ingestion-script.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 + +import os +import re +import glob + +import tqdm +import boto3 +s3 = boto3.resource('s3') + +import terracotta as tc + +# settings +DB_NAME = 'terracotta.sqlite' +RASTER_GLOB = r'/path/to/rasters/*.tif' +RASTER_NAME_PATTERN = r'(?P\w{2})_(?P\w{5})_(?P\d{8})_(?P\w+).tif' +KEYS = ('sensor', 'tile', 'date', 'band') +KEY_DESCRIPTIONS = { + 'sensor': 'Sensor short name', + 'tile': 'Sentinel-2 tile ID', + 'date': 'Sensing date', + 'band': 'Band or index name' +} +S3_BUCKET = 'tc-testdata' +S3_RASTER_FOLDER = 'rasters' +S3_PATH = f's3://{S3_BUCKET}/{S3_RASTER_FOLDER}' + +driver = tc.get_driver(DB_NAME) + +# create an empty database if it doesn't exist +if not os.path.isfile(DB_NAME): + driver.create(KEYS, KEY_DESCRIPTIONS) + +# sanity check +assert driver.key_names == KEYS + +available_datasets = driver.get_datasets() +raster_files = list(glob.glob(RASTER_GLOB)) +pbar = tqdm.tqdm(raster_files) + +for raster_path in pbar: + pbar.set_postfix(file=raster_path) + + raster_filename = os.path.basename(raster_path) + + # extract keys from filename + match = re.match(RASTER_NAME_PATTERN, raster_filename) + if match is None: + raise ValueError(f'Input file {raster_filename} does not match raster pattern') + + keys = match.groups() + + # skip already processed data + if keys in available_datasets: + continue + + with driver.connect(): + # since the rasters will be served from S3, we need to pass the correct remote path + driver.insert(keys, raster_path, override_path=f'{S3_PATH}/{raster_filename}') + s3.meta.client.upload_file(raster_path, S3_BUCKET, + f'{S3_RASTER_FOLDER}/{raster_filename}') + +# upload database to S3 +s3.meta.client.upload_file(DB_NAME, S3_BUCKET, DB_NAME) diff --git a/docs/get-started.rst b/docs/get-started.rst index 900fb8d6..51d43ce0 100644 --- a/docs/get-started.rst +++ b/docs/get-started.rst @@ -1,6 +1,8 @@ Get started =========== +.. _installation: + Installation ------------ @@ -20,7 +22,7 @@ can just run $ pip install -e . -in the root of this repository instead. +in the root of the Terracotta repository instead. Creating a raster database -------------------------- @@ -31,7 +33,7 @@ ingested into a database. There are two ways to populate this metadata store: 1. Through the CLI -~~~~~~~~~~~~~~~~~~ +++++++++++++++++++ A simple but limited way to build a database is to use the command line interface. All you need to do is to point Terracotta to a folder of @@ -39,7 +41,9 @@ interface. All you need to do is to point Terracotta to a folder of .. code:: bash - $ terracotta ingest /path/to/gtiffs/{sensor}_{name}_{date}_{band}.tif -o terracotta.sqlite + $ terracotta ingest \ + /path/to/gtiffs/{sensor}_{name}_{date}_{band}.tif \ + -o terracotta.sqlite This will create a new database with the keys ``sensor``, ``name``, ``date``, and ``band`` (in this order), and ingest all files matching @@ -52,7 +56,7 @@ For available options, see $ terracotta ingest --help 2. Using the Python API -~~~~~~~~~~~~~~~~~~~~~~~ ++++++++++++++++++++++++ Terracotta’s driver API gives you fine-grained control over ingestion and retrieval. Metadata can be computed at three different times: @@ -73,71 +77,11 @@ local directory. It extracts the appropriate keys from the file name, ingests them into a database, and pushes the rasters and the resulting database into an S3 bucket. -.. code:: python - - #!/usr/bin/env python3 - - import os - import re - import glob - - import tqdm - import boto3 - s3 = boto3.resource('s3') - - import terracotta as tc - - # settings - DB_NAME = 'terracotta.sqlite' - RASTER_GLOB = r'/path/to/rasters/*.tif' - RASTER_NAME_PATTERN = r'(?P\w{2})_(?P\w{5})_(?P\d{8})_(?P\w+).tif' - KEYS = ('sensor', 'tile', 'date', 'band') - KEY_DESCRIPTIONS = { - 'sensor': 'Sensor short name', - 'tile': 'Sentinel-2 tile ID', - 'date': 'Sensing date', - 'band': 'Band or index name' - } - S3_BUCKET = 'tc-testdata' - S3_RASTER_FOLDER = 'rasters' - S3_PATH = f's3://{S3_BUCKET}/{S3_RASTER_FOLDER}' - - driver = tc.get_driver(DB_NAME) - - # create an empty database if it doesn't exist - if not os.path.isfile(DB_NAME): - driver.create(KEYS, KEY_DESCRIPTIONS) - - # sanity check - assert driver.key_names == KEYS - - available_datasets = driver.get_datasets() - raster_files = list(glob.glob(RASTER_GLOB)) - pbar = tqdm.tqdm(raster_files) - - for raster_path in pbar: - pbar.set_postfix(file=raster_path) - - raster_filename = os.path.basename(raster_path) - - # extract keys from filename - match = re.match(RASTER_NAME_PATTERN, raster_filename) - if match is None: - raise ValueError(f'Input file {raster_filename} does not match raster pattern') - - keys = match.groups() - - # skip already processed data - if keys in available_datasets: - continue - - with driver.connect(): - # since the rasters will be served from S3, we need to pass the correct remote path - driver.insert(keys, raster_path, override_path=f'{S3_PATH}/{raster_filename}') - s3.meta.client.upload_file(raster_path, S3_BUCKET, f'{S3_RASTER_FOLDER}/{raster_filename}') +.. literalinclude:: example-ingestion-script.py + :language: python + :caption: example-ingestion-script.py - # upload database to S3 - s3.meta.client.upload_file(DB_NAME, S3_BUCKET, DB_NAME) +:download:`Download the script ` Note that the above script is just a simple example to show you some capabilities of the Terracotta Python API. More sophisticated solutions diff --git a/docs/index.rst b/docs/index.rst index 23e51bda..d2ce22ad 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -6,27 +6,32 @@ Welcome to Terracotta Use cases --------- -Terracotta covers three major use cases: - 1. Use it as data exploration tool to quickly serve up a folder containing GeoTiff images with ``terracotta serve``. -2. Make it your tile server backend on an existing webserver. Refer to - `the Flask - documentation `__ for - more information. You can ingest your data `ahead of - time <#ingestion>`__ (recommended) or on-demand. -3. Deploy it on serverless architectures such as AWS λ to serve tiles - from S3 buckets. This allows you to build apps that scale almost +2. :doc:`Make it your tile server backend on an existing webserver. ` + You can ingest your data `ahead of time <#ingestion>`__ (recommended) + or on-demand. +3. :doc:`Deploy it on serverless architectures such as AWS λ to serve tiles + from S3 buckets. ` This allows you to build apps that scale almost infinitely with minimal maintenance! To make it as easy as possible to deploy to AWS λ, we make use of the magic provided by - `Zappa `__. See `Deployment on - AWS <#deployment-to-aws-λ>`__ for more details. + `Zappa `__. + +Installation +------------ + +.. code-block:: bash + + $ pip install terracotta[recommended] + +Or see :ref:`our installation guide ` for conda-based +and development installations. Why Terracotta? --------------- There are many good reasons to ditch your ancient raster data workflow -and switch to Terracotta. Some of them are listed here: +and switch to Terracotta: - It is trivial to get going. Got a folder full of cloud-optimized GeoTiffs in different projections you want to have a look at in your diff --git a/docs/settings.rst b/docs/settings.rst index fa7b4e71..9128882d 100644 --- a/docs/settings.rst +++ b/docs/settings.rst @@ -9,15 +9,34 @@ Because Terracotta can either run locally, on a web server, or serverless, you c settings in several different ways: - Terracotta is fully configurable through environment variables that are prefixed with ``TC_``. - E.g., running ``export TC_UPSAMPLING_METHOD=cubic`` will set the corresponding setting + E.g., running + + .. code-block:: bash + + $ export TC_UPSAMPLING_METHOD=cubic + + will set the corresponding setting ``UPSAMPLING_METHOD`` to ``cubic`` in all Terracotta instances. This is particularly useful for serverless deployments. You can set list values in JSON array notation: - ``export TC_DEFAULT_TILE_SIZE=[128,128]``. -- All :ref:`command line functions ` accept the path to a TOML file via the ``-c`` flag - (e.g. ``terracotta -c config.toml``). + .. code-block:: bash + + $ export TC_DEFAULT_TILE_SIZE="[128,128]" + +- All :ref:`CLI commands ` accept the path to a TOML file via the ``-c`` flag. Example: + + .. code-block:: bash + + $ terracotta -c config.toml serve -d tc.sqlite -- If you are using the :ref:`Terracotta Python API `, you can call + where ``config.toml`` contains e.g. + + .. code-block:: none + + DRIVER_PATH = root:password@myserver.com/terracotta + DRIVER_PROVIDER = mysql + +- If you are using the :doc:`Terracotta Python API `, you can call :func:`~terracotta.update_settings` directly. @@ -33,5 +52,12 @@ settings in several different ways: Available runtime settings -------------------------- +All runtime settings are contained in the following :class:`~typing.NamedTuple`. + +.. seealso:: + + To see the types and default values of the settings, + `have a look at the TerracottaSettings source code <_modules/terracotta/config.html#TerracottaSettings>`_. + .. autoclass:: terracotta.config.TerracottaSettings :members: \ No newline at end of file diff --git a/terracotta/__init__.py b/terracotta/__init__.py index 5af7fe1a..83638a9e 100644 --- a/terracotta/__init__.py +++ b/terracotta/__init__.py @@ -6,7 +6,7 @@ # get version try: from terracotta._version import version as __version__ # noqa: F401 -except ImportError: +except ImportError: # pragma: no cover # package is not installed raise RuntimeError( 'Terracotta has not been installed correctly. Please run `pip install -e .` or ' @@ -23,6 +23,22 @@ def update_settings(**new_config: Any) -> None: + """Update the global Terracotta runtime settings. + + Arguments: + + new_config: Options to override. Have to be valid Terracotta settings. + + Example: + + >>> import terracotta as tc + >>> tc.get_settings().DEFAULT_TILE_SIZE + (256, 256) + >>> tc.update_settings(DEFAULT_TILE_SIZE=[512, 512]) + >>> tc.get_settings().DEFAULT_TILE_SIZE + (512, 512) + + """ from terracotta.config import parse_config global _settings, _overwritten_settings current_config = {k: getattr(_settings, k) for k in _overwritten_settings} @@ -31,6 +47,15 @@ def update_settings(**new_config: Any) -> None: def get_settings() -> TerracottaSettings: + """Returns the current set of global runtime settings. + + Example: + + >>> import terracotta as tc + >>> tc.get_settings().DEBUG + False + + """ return _settings diff --git a/terracotta/config.py b/terracotta/config.py index 41694b47..85f9b87c 100644 --- a/terracotta/config.py +++ b/terracotta/config.py @@ -59,7 +59,7 @@ class TerracottaSettings(NamedTuple): DOWNSAMPLING_METHOD: str = 'nearest' -AVAILABLE_SETTINGS: Tuple[str, ...] = tuple(TerracottaSettings._field_types.keys()) +AVAILABLE_SETTINGS: Tuple[str, ...] = tuple(TerracottaSettings._fields) def _is_writable(path: str) -> bool: diff --git a/terracotta/drivers/__init__.py b/terracotta/drivers/__init__.py index 5ad4fa8d..e64e4960 100644 --- a/terracotta/drivers/__init__.py +++ b/terracotta/drivers/__init__.py @@ -3,34 +3,24 @@ Define an interface to retrieve Terracotta drivers. """ -from typing import Callable, Any, Union, Dict, Type -import functools +from typing import Union, Tuple, Dict, Type import urllib.parse as urlparse from pathlib import Path from terracotta.drivers.base import Driver - -def singleton(fun: Callable) -> Callable: - instance_cache: Dict[Any, Any] = {} - - @functools.wraps(fun) - def inner(*args: Any, **kwargs: Any) -> Any: - key = tuple(args) + tuple(kwargs.items()) - if key not in instance_cache: - instance_cache[key] = fun(*args, **kwargs) - return instance_cache[key] - - return inner +URLOrPathType = Union[str, Path] def load_driver(provider: str) -> Type[Driver]: if provider == 'sqlite-remote': from terracotta.drivers.sqlite_remote import RemoteSQLiteDriver return RemoteSQLiteDriver + if provider == 'mysql': from terracotta.drivers.mysql import MySQLDriver return MySQLDriver + if provider == 'sqlite': from terracotta.drivers.sqlite import SQLiteDriver return SQLiteDriver @@ -44,17 +34,54 @@ def auto_detect_provider(url_or_path: Union[str, Path]) -> str: scheme = parsed_path.scheme if scheme == 's3': return 'sqlite-remote' + if scheme == 'mysql': return 'mysql' return 'sqlite' -@singleton -def get_driver(url_or_path: Union[str, Path], provider: str = None) -> Driver: +_DRIVER_CACHE: Dict[Tuple[URLOrPathType, str], Driver] = {} + + +def get_driver(url_or_path: URLOrPathType, provider: str = None) -> Driver: + """Retrieve Terracotta driver instance for the given path. + + This function always returns the same instance for identical inputs. + + Warning: + + Always retrieve Driver instances through this function instead of + instantiating them directly to prevent caching issues. + + Arguments: + + url_or_path: A path indentifying the database to connect to. + The expected format depends on the driver provider. + provider: Driver provider to use (one of sqlite, sqlite-remote, mysql; + default: auto-detect). + + Example: + + >>> import terracotta as tc + >>> tc.get_driver('tc.sqlite') + SQLiteDriver('/home/terracotta/tc.sqlite') + >>> tc.get_driver('mysql://root@localhost/tc') + MySQLDriver('mysql://root@localhost:3306/tc') + >>> # pass provider if path is given in a non-standard way + >>> tc.get_driver('root@localhost/tc', provider='mysql') + MySQLDriver('mysql://root@localhost:3306/tc') + + """ if provider is None: # try and auto-detect provider = auto_detect_provider(url_or_path) - DriverClass = load_driver(provider) + if isinstance(url_or_path, Path) or provider == 'sqlite': + url_or_path = Path(url_or_path).resolve() + + cache_key = (url_or_path, provider) + if cache_key not in _DRIVER_CACHE: + DriverClass = load_driver(provider) + _DRIVER_CACHE[cache_key] = DriverClass(url_or_path) - return DriverClass(url_or_path) + return _DRIVER_CACHE[cache_key] diff --git a/terracotta/drivers/mysql.py b/terracotta/drivers/mysql.py index 911c6c22..82a66b38 100644 --- a/terracotta/drivers/mysql.py +++ b/terracotta/drivers/mysql.py @@ -28,6 +28,11 @@ T = TypeVar('T') +_ERROR_ON_CONNECT = ( + 'Could not retrieve version from database. Make sure that the given path points ' + 'to a valid Terracotta database, and that you ran driver.create().' +) + @contextlib.contextmanager def convert_exceptions(msg: str) -> Iterator: @@ -130,7 +135,7 @@ def _parse_db_name(con_params: ParseResult) -> str: return path @requires_connection - @convert_exceptions('Could not retrieve version from database') + @convert_exceptions(_ERROR_ON_CONNECT) def _get_db_version(self) -> str: """Getter for db_version""" cursor = self._cursor @@ -386,7 +391,9 @@ def insert(self, cursor = self._cursor if len(keys) != len(self.key_names): - raise exceptions.InvalidKeyError(f'Not enough keys (available keys: {self.key_names})') + raise exceptions.InvalidKeyError( + f'Got wrong number of keys (available keys: {self.key_names})' + ) if override_path is None: override_path = filepath @@ -415,7 +422,9 @@ def delete(self, keys: Union[Sequence[str], Mapping[str, str]]) -> None: cursor = self._cursor if len(keys) != len(self.key_names): - raise exceptions.InvalidKeyError(f'Not enough keys (available keys: {self.key_names})') + raise exceptions.InvalidKeyError( + f'Got wrong number of keys (available keys: {self.key_names})' + ) keys = self._key_dict_to_sequence(keys) key_dict = dict(zip(self.key_names, keys)) diff --git a/terracotta/drivers/raster_base.py b/terracotta/drivers/raster_base.py index 6b68c744..54584a80 100644 --- a/terracotta/drivers/raster_base.py +++ b/terracotta/drivers/raster_base.py @@ -19,13 +19,13 @@ from cachetools import cachedmethod, LRUCache from affine import Affine -if TYPE_CHECKING: +if TYPE_CHECKING: # pragma: no cover from rasterio.io import DatasetReader # noqa: F401 try: from crick import TDigest, SummaryStats has_crick = True -except ImportError: +except ImportError: # pragma: no cover has_crick = False from terracotta import get_settings, exceptions diff --git a/terracotta/drivers/sqlite.py b/terracotta/drivers/sqlite.py index fd42adae..f566b133 100644 --- a/terracotta/drivers/sqlite.py +++ b/terracotta/drivers/sqlite.py @@ -21,6 +21,11 @@ from terracotta.drivers.base import requires_connection from terracotta.drivers.raster_base import RasterDriver +_ERROR_ON_CONNECT = ( + 'Could not retrieve version from database. Make sure that the given path points ' + 'to a valid Terracotta database, and that you ran driver.create().' +) + @contextlib.contextmanager def convert_exceptions(msg: str) -> Iterator: @@ -76,13 +81,18 @@ def __init__(self, path: Union[str, Path]) -> None: self._connection: Connection self._connected = False - super().__init__(path) + super().__init__(os.path.realpath(path)) @contextlib.contextmanager def connect(self, check: bool = True) -> Iterator: try: close = False if not self._connected: + # if check and not os.path.isfile(self.path): + # raise exceptions.InvalidDatabaseError( + # f'Database file {self.path} does not exist ' + # f'(run driver.create() before connecting to a new database)' + # ) with convert_exceptions('Unable to connect to database'): self._connection = sqlite3.connect( self.path, timeout=self.DB_CONNECTION_TIMEOUT @@ -98,6 +108,7 @@ def connect(self, check: bool = True) -> Iterator: except Exception: self._connection.rollback() raise + finally: if close: self._connection.commit() @@ -105,7 +116,7 @@ def connect(self, check: bool = True) -> Iterator: self._connected = False @requires_connection - @convert_exceptions('Could not retrieve version from database') + @convert_exceptions(_ERROR_ON_CONNECT) def _get_db_version(self) -> str: """Getter for db_version""" conn = self._connection @@ -116,12 +127,6 @@ def _get_db_version(self) -> str: def _connection_callback(self) -> None: """Called after opening a new connection""" - if not os.path.isfile(self.path): - raise exceptions.InvalidDatabaseError( - f'Database file {self.path} does not exist ' - f'(run driver.create() before connecting to a new database)' - ) - # check for version compatibility def versiontuple(version_string: str) -> Sequence[str]: return version_string.split('.') @@ -271,7 +276,9 @@ def get_metadata(self, keys: Union[Sequence[str], Mapping[str, str]]) -> Dict[st keys = tuple(self._key_dict_to_sequence(keys)) if len(keys) != len(self.key_names): - raise exceptions.InvalidKeyError('Got wrong number of keys') + raise exceptions.InvalidKeyError( + f'Got wrong number of keys (available keys: {self.key_names})' + ) conn = self._connection @@ -307,7 +314,9 @@ def insert(self, conn = self._connection if len(keys) != len(self.key_names): - raise exceptions.InvalidKeyError(f'Not enough keys (available keys: {self.key_names})') + raise exceptions.InvalidKeyError( + f'Got wrong number of keys (available keys: {self.key_names})' + ) if override_path is None: override_path = filepath @@ -335,7 +344,9 @@ def delete(self, keys: Union[Sequence[str], Mapping[str, str]]) -> None: conn = self._connection if len(keys) != len(self.key_names): - raise exceptions.InvalidKeyError(f'Not enough keys (available keys: {self.key_names})') + raise exceptions.InvalidKeyError( + f'Got wrong number of keys (available keys: {self.key_names})' + ) keys = self._key_dict_to_sequence(keys) key_dict = dict(zip(self.key_names, keys)) diff --git a/terracotta/drivers/sqlite_remote.py b/terracotta/drivers/sqlite_remote.py index b9d18dc1..542d11a1 100644 --- a/terracotta/drivers/sqlite_remote.py +++ b/terracotta/drivers/sqlite_remote.py @@ -106,9 +106,4 @@ def delete(self, *args: Any, **kwargs: Any) -> None: def __del__(self) -> None: """Clean up temporary database upon exit""" - rm = self.__rm - try: - rm(self.path) - except AttributeError: - # object is deleted before self.path is declared - pass + self.__rm(self.path) diff --git a/terracotta/image.py b/terracotta/image.py index f167c177..beaf0b4b 100644 --- a/terracotta/image.py +++ b/terracotta/image.py @@ -84,6 +84,8 @@ def array_to_png(img_data: Array, )).tobytes() assert palette.shape == (3 * 256,), palette.shape + else: + raise ValueError('Input array must have 2 or 3 dimensions') if isinstance(img_data, np.ma.MaskedArray): img_data = img_data.filled(0) diff --git a/terracotta/logs.py b/terracotta/logs.py index e0225b8c..74da8ee7 100644 --- a/terracotta/logs.py +++ b/terracotta/logs.py @@ -10,7 +10,7 @@ try: import colorlog use_colors = True -except ImportError: +except ImportError: # pragma: no cover use_colors = False diff --git a/terracotta/scripts/cli.py b/terracotta/scripts/cli.py index 1f4604ea..be61aa9a 100644 --- a/terracotta/scripts/cli.py +++ b/terracotta/scripts/cli.py @@ -22,7 +22,15 @@ def cli(ctx: click.Context, config: Mapping[str, Any] = None, loglevel: str = None) -> None: - """The Terracotta command line interface""" + """The command line interface for the Terracotta tile server. + + All flags must be passed before specifying a subcommand. + + Example: + + $ terracotta -c config.toml connect localhost:5000 + + """ if ctx.invoked_subcommand is None: click.echo(ctx.get_help()) diff --git a/terracotta/scripts/connect.py b/terracotta/scripts/connect.py index 336e9d1a..e6beb0cb 100644 --- a/terracotta/scripts/connect.py +++ b/terracotta/scripts/connect.py @@ -28,7 +28,12 @@ def connect(terracotta_hostname: str, no_browser: bool = False, port: int = None) -> None: """Connect to a running Terracotta and interactively explore data in it. - First argument is hostname and port to connect to (e.g. localhost:5000). + First argument is hostname and port to connect to. + + Example: + + $ terracotta connect localhost:5000 + """ from terracotta.client.flask_api import create_app diff --git a/terracotta/scripts/ingest.py b/terracotta/scripts/ingest.py index 7d258faf..974ec687 100644 --- a/terracotta/scripts/ingest.py +++ b/terracotta/scripts/ingest.py @@ -32,20 +32,20 @@ def ingest(raster_pattern: RasterPatternType, skip_metadata: bool = False, rgb_key: str = None, quiet: bool = False) -> None: - """Ingest a collection of raster files into a SQLite database. + """Ingest a collection of raster files into a (new or existing) SQLite database. First argument is a format pattern defining paths and keys of all raster files. Example: - terracotta create-database /path/to/rasters/{name}/{date}_{band}{}.tif -o out.sqlite + $ terracotta create-database /path/to/rasters/{name}/{date}_{band}{}.tif -o out.sqlite The empty group {} is replaced by a wildcard matching anything (similar to * in glob patterns). Existing datasets are silently overwritten. - This command only supports the creation of a simple SQLite database without any additional - metadata. For more sophisticated use cases use the Terracotta Python API. + This command only supports the creation of a simple, local SQLite database without any + additional metadata. For more sophisticated use cases use the Terracotta Python API. """ from terracotta import get_driver diff --git a/terracotta/scripts/optimize_rasters.py b/terracotta/scripts/optimize_rasters.py index 55e708e4..9a4ad6d9 100644 --- a/terracotta/scripts/optimize_rasters.py +++ b/terracotta/scripts/optimize_rasters.py @@ -149,7 +149,7 @@ def optimize_rasters(raster_files: Sequence[Sequence[Path]], Example: - terracotta optimize-rasters rasters/*.tif -o cloud-optimized/ + $ terracotta optimize-rasters rasters/*.tif -o cloud-optimized/ Note that all rasters may only contain a single band. """ diff --git a/terracotta/scripts/serve.py b/terracotta/scripts/serve.py index 99a8b464..4bdcf624 100644 --- a/terracotta/scripts/serve.py +++ b/terracotta/scripts/serve.py @@ -46,7 +46,7 @@ def serve(database: str = None, Example: - terracotta serve -r /path/to/rasters/{name}/{date}_{band}_{}.tif + $ terracotta serve -r /path/to/rasters/{name}/{date}_{band}_{}.tif The empty group {} is replaced by a wildcard matching anything (similar to * in glob patterns). diff --git a/terracotta/server/flask_api.py b/terracotta/server/flask_api.py index fbdd0d06..79629db5 100644 --- a/terracotta/server/flask_api.py +++ b/terracotta/server/flask_api.py @@ -107,7 +107,6 @@ def create_app(debug: bool = False, profile: bool = False) -> Flask: if profile: from werkzeug.contrib.profiler import ProfilerMiddleware - new_app.config['PROFILE'] = True new_app.wsgi_app = ProfilerMiddleware(new_app.wsgi_app, restrictions=[30]) return new_app diff --git a/tests/drivers/test_drivers.py b/tests/drivers/test_drivers.py index ef9ec9dc..45d63d03 100644 --- a/tests/drivers/test_drivers.py +++ b/tests/drivers/test_drivers.py @@ -12,6 +12,14 @@ def test_auto_detect(driver_path, provider): from terracotta import drivers db = drivers.get_driver(driver_path) assert db.__class__.__name__ == DRIVER_CLASSES[provider] + assert drivers.get_driver(driver_path, provider=provider) is db + + +def test_get_driver_invalid(): + from terracotta import drivers + with pytest.raises(ValueError) as exc: + drivers.get_driver('', provider='foo') + assert 'Unknown database provider' in str(exc.value) @pytest.mark.parametrize('provider', DRIVERS) @@ -46,7 +54,7 @@ def test_creation_invalid(driver_path, provider): with pytest.raises(exceptions.InvalidKeyError) as exc: db.create(keys) - assert 'must be alphanumeric' in str(exc.value) + assert 'must be alphanumeric' in str(exc.value) @pytest.mark.parametrize('provider', DRIVERS) @@ -58,7 +66,7 @@ def test_creation_invalid_description(driver_path, provider): with pytest.raises(exceptions.InvalidKeyError) as exc: db.create(keys, key_descriptions={'unknown_key': 'blah'}) - assert 'contains unknown keys' in str(exc.value) + assert 'contains unknown keys' in str(exc.value) @pytest.mark.parametrize('provider', DRIVERS) @@ -70,7 +78,7 @@ def test_creation_reserved_names(driver_path, provider): with pytest.raises(exceptions.InvalidKeyError) as exc: db.create(keys) - assert 'key names cannot be one of' in str(exc.value) + assert 'key names cannot be one of' in str(exc.value) @pytest.mark.parametrize('provider', DRIVERS) @@ -82,7 +90,7 @@ def test_connect_before_create(driver_path, provider): with db.connect(): pass - assert 'run driver.create()' in str(exc.value) + assert 'ran driver.create()' in str(exc.value) @pytest.mark.parametrize('provider', DRIVERS) @@ -132,4 +140,4 @@ def test_version_conflict(driver_path, provider, raster_file, monkeypatch): with db.connect(check=True): pass - assert fake_version in str(exc.value) + assert fake_version in str(exc.value) diff --git a/tests/drivers/test_raster_drivers.py b/tests/drivers/test_raster_drivers.py index ae8a2858..b7cdd6a5 100644 --- a/tests/drivers/test_raster_drivers.py +++ b/tests/drivers/test_raster_drivers.py @@ -70,7 +70,7 @@ def test_where(driver_path, provider, raster_file): with pytest.raises(exceptions.InvalidKeyError) as exc: db.get_datasets(where=dict(unknown='foo')) - assert 'unrecognized keys' in str(exc.value) + assert 'unrecognized keys' in str(exc.value) @pytest.mark.parametrize('provider', DRIVERS) @@ -174,15 +174,15 @@ def test_wrong_key_number(driver_path, provider, raster_file): with pytest.raises(exceptions.InvalidKeyError) as exc: db.get_metadata(['a', 'b']) - assert 'not enough keys' in str(exc.value) + assert 'wrong number of keys' in str(exc.value) with pytest.raises(exceptions.InvalidKeyError) as exc: db.insert(['a', 'b'], '') - assert 'not enough keys' in str(exc.value) + assert 'wrong number of keys' in str(exc.value) with pytest.raises(exceptions.InvalidKeyError) as exc: db.delete(['a', 'b']) - assert 'not enough keys' in str(exc.value) + assert 'wrong number of keys' in str(exc.value) @pytest.mark.parametrize('provider', DRIVERS) diff --git a/tests/handlers/test_rgb.py b/tests/handlers/test_rgb.py index 6179ccbd..29310bce 100644 --- a/tests/handlers/test_rgb.py +++ b/tests/handlers/test_rgb.py @@ -28,6 +28,14 @@ def test_rgb_invalid_keys(use_testdb, raster_file_xyz): rgb.rgb(['val21', 'x', 'y', 'z'], ['val22', 'val23', 'val24'], raster_file_xyz) +def test_rgb_invalid_args(use_testdb, raster_file_xyz): + from terracotta import exceptions + from terracotta.handlers import rgb + + with pytest.raises(exceptions.InvalidArgumentsError): + rgb.rgb(['val21', 'x'], ['val22', 'val23', 'val24'], raster_file_xyz, stretch_ranges=[]) + + def test_rgb_invalid_rgb_values(use_testdb, raster_file_xyz): from terracotta import exceptions from terracotta.handlers import rgb diff --git a/tests/server/test_flask_api.py b/tests/server/test_flask_api.py index f04d46de..5569f1b3 100644 --- a/tests/server/test_flask_api.py +++ b/tests/server/test_flask_api.py @@ -290,6 +290,11 @@ def test_get_colormap(client): assert len(json.loads(rv.data)['colormap']) == 100 +def test_get_colormap_invalid(client): + rv = client.get('/colormap?stretch_range=[0,1') + assert rv.status_code == 400 + + def test_get_colormap_extra_args(client): rv = client.get('/colormap?stretch_range=[0,1]&num_values=100&foo=bar&baz=quz') assert rv.status_code == 200 diff --git a/tests/test_config.py b/tests/test_config.py index 1b77c80f..4d2af340 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -3,6 +3,13 @@ import pytest +def test_schema_integrity(): + from terracotta import config + settings_fields = config.TerracottaSettings._fields + schema_fields = config.SettingSchema._declared_fields + assert set(settings_fields) == set(schema_fields) + + def test_env_config(monkeypatch): from terracotta import config @@ -31,6 +38,12 @@ def test_env_config_invalid(monkeypatch): m.setenv('TC_DEBUG', 'foo') # not a boolean with pytest.raises(ValueError): config.parse_config() + + with monkeypatch.context() as m: + m.setenv('TC_REMOTE_DB_CACHE_DIR', '/foo/test.sqlite') # non-existing folder + with pytest.raises(ValueError): + config.parse_config() + assert True diff --git a/tests/test_image.py b/tests/test_image.py index 19144211..55a7d713 100644 --- a/tests/test_image.py +++ b/tests/test_image.py @@ -21,15 +21,15 @@ def test_array_to_png_singleband_invalid(): with pytest.raises(exceptions.InvalidArgumentsError) as exc: image.array_to_png(np.zeros((20, 20)), colormap='unknown') - assert 'invalid color map' in exc.value + assert 'invalid color map' in str(exc.value) with pytest.raises(exceptions.InvalidArgumentsError) as exc: image.array_to_png(np.zeros((20, 20)), colormap=[(0, 0, 0, 0)] * 1000) - assert 'must contain less' in exc.value + assert 'must contain less' in str(exc.value) with pytest.raises(ValueError) as exc: image.array_to_png(np.zeros((20, 20)), colormap=[(0, 0, 0)] * 10) - assert 'must have shape' in exc.value + assert 'must have shape' in str(exc.value) def test_array_to_png_rgb(): @@ -50,11 +50,15 @@ def test_array_to_png_rgb_invalid(): too_many_bands = np.random.randint(0, 256, size=(256, 512, 4), dtype='uint8') with pytest.raises(ValueError) as exc: image.array_to_png(too_many_bands) - assert 'must have three bands' in exc.value + assert 'must have three bands' in str(exc.value) with pytest.raises(ValueError) as exc: image.array_to_png(np.zeros((20, 20, 3)), colormap='viridis') - assert 'Colormap argument cannot be given' in exc.value + assert 'Colormap argument cannot be given' in str(exc.value) + + with pytest.raises(ValueError) as exc: + image.array_to_png(np.array([])) + assert '2 or 3 dimensions' in str(exc.value) def test_contrast_stretch(): @@ -100,4 +104,4 @@ def test_label_invalid(): data = np.array([15, 16, 17]) with pytest.raises(ValueError) as exc: image.label(data, list(range(1000))) - assert 'more than 255 labels' in exc.value + assert 'more than 255 labels' in str(exc.value) From edfaffc5b5362efbca9b05eeba6e0881ff26f57c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dion=20H=C3=A4fner?= Date: Tue, 27 Nov 2018 16:15:20 +0100 Subject: [PATCH 2/6] fix benchmarks and raster cache (doh) --- terracotta/drivers/raster_base.py | 7 +++++-- terracotta/handlers/metadata.py | 4 ++-- tests/benchmarks.py | 23 +++++++++-------------- 3 files changed, 16 insertions(+), 18 deletions(-) diff --git a/terracotta/drivers/raster_base.py b/terracotta/drivers/raster_base.py index 54584a80..cd131f5f 100644 --- a/terracotta/drivers/raster_base.py +++ b/terracotta/drivers/raster_base.py @@ -12,7 +12,6 @@ import operator import logging import math -import sys import warnings import numpy as np @@ -49,7 +48,10 @@ class RasterDriver(Driver): @abstractmethod def __init__(self, *args: Any, **kwargs: Any) -> None: settings = get_settings() - self._raster_cache = LRUCache(settings.RASTER_CACHE_SIZE, getsizeof=sys.getsizeof) + self._raster_cache = LRUCache( + settings.RASTER_CACHE_SIZE, + getsizeof=operator.attrgetter('nbytes') + ) self._executor = concurrent.futures.ThreadPoolExecutor(max_workers=3) super().__init__(*args, **kwargs) @@ -474,6 +476,7 @@ def get_raster_tile(self, upsampling_method=settings.UPSAMPLING_METHOD, downsampling_method=settings.DOWNSAMPLING_METHOD ) + print(self._raster_cache.currsize) if asynchronous: return self._executor.submit(task) diff --git a/terracotta/handlers/metadata.py b/terracotta/handlers/metadata.py index 14dfb2ac..cf05c199 100644 --- a/terracotta/handlers/metadata.py +++ b/terracotta/handlers/metadata.py @@ -3,7 +3,7 @@ Handle /metadata API endpoint. """ -from typing import Mapping, Sequence, Any, Union # noqa: F401 +from typing import Mapping, Sequence, Dict, Any, Union from collections import OrderedDict from terracotta import get_settings, get_driver @@ -11,7 +11,7 @@ @trace('metadata_handler') -def metadata(keys: Union[Sequence[str], Mapping[str, str]]) -> 'OrderedDict[str, Any]': +def metadata(keys: Union[Sequence[str], Mapping[str, str]]) -> Dict[str, Any]: """Returns all metadata for a single dataset""" settings = get_settings() driver = get_driver(settings.DRIVER_PATH, provider=settings.DRIVER_PROVIDER) diff --git a/tests/benchmarks.py b/tests/benchmarks.py index 86648806..ce8c9322 100644 --- a/tests/benchmarks.py +++ b/tests/benchmarks.py @@ -16,10 +16,12 @@ @pytest.fixture(scope='session') def benchmark_database(big_raster_file_nodata, big_raster_file_mask, tmpdir_factory): - from terracotta import get_driver + from terracotta import get_driver, update_settings keys = ['type', 'band'] + update_settings(RASTER_CACHE_SIZE=0) + dbpath = tmpdir_factory.mktemp('db').join('db-readonly.sqlite') driver = get_driver(dbpath, provider='sqlite') driver.create(keys) @@ -58,8 +60,7 @@ def test_bench_rgb(benchmark, zoom, resampling, big_raster_file_nodata, benchmar update_settings( DRIVER_PATH=str(benchmark_database), UPSAMPLING_METHOD=resampling, - DOWNSAMPLING_METHOD=resampling, - RASTER_CACHE_SIZE=0 + DOWNSAMPLING_METHOD=resampling ) zoom_level = ZOOM_XYZ[zoom] @@ -79,10 +80,7 @@ def test_bench_rgb_out_of_bounds(benchmark, big_raster_file_nodata, benchmark_da from terracotta.server import create_app from terracotta import update_settings - update_settings( - DRIVER_PATH=str(benchmark_database), - RASTER_CACHE_SIZE=0 - ) + update_settings(DRIVER_PATH=str(benchmark_database)) x, y, z = 0, 0, 20 @@ -97,13 +95,12 @@ def test_bench_rgb_out_of_bounds(benchmark, big_raster_file_nodata, benchmark_da @pytest.mark.parametrize('zoom', ZOOM_XYZ.keys()) def test_bench_singleband(benchmark, zoom, resampling, big_raster_file_nodata, benchmark_database): from terracotta.server import create_app - from terracotta import update_settings + from terracotta import update_settings, get_driver update_settings( DRIVER_PATH=str(benchmark_database), UPSAMPLING_METHOD=resampling, - DOWNSAMPLING_METHOD=resampling, - RASTER_CACHE_SIZE=0 + DOWNSAMPLING_METHOD=resampling ) zoom_level = ZOOM_XYZ[zoom] @@ -117,16 +114,14 @@ def test_bench_singleband(benchmark, zoom, resampling, big_raster_file_nodata, b rv = benchmark(client.get, f'/singleband/nodata/1/preview.png') assert rv.status_code == 200 + assert not len(get_driver(str(benchmark_database))._raster_cache) def test_bench_singleband_out_of_bounds(benchmark, benchmark_database): from terracotta.server import create_app from terracotta import update_settings - update_settings( - DRIVER_PATH=str(benchmark_database), - RASTER_CACHE_SIZE=0 - ) + update_settings(DRIVER_PATH=str(benchmark_database)) x, y, z = 0, 0, 20 From ae4a31f72bd99a7b459bbc8f4249102829656a30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dion=20H=C3=A4fner?= Date: Wed, 28 Nov 2018 17:50:02 +0100 Subject: [PATCH 3/6] more docs more docs --- docs/api.rst | 39 ++++--- docs/conf.py | 12 ++- docs/drivers/mysql.rst | 7 -- docs/drivers/sqlite-remote.rst | 8 -- docs/drivers/sqlite.rst | 7 -- docs/get-started.rst | 118 +++++++++++++++++---- docs/settings.rst | 9 +- docs/tutorials/windows.rst | 157 ++++++++++++++++++++++++++++ terracotta/drivers/__init__.py | 4 +- terracotta/drivers/base.py | 124 +++++++++++++++++----- terracotta/drivers/mysql.py | 97 ++++++++++------- terracotta/drivers/raster_base.py | 121 +++++++++++++++++---- terracotta/drivers/sqlite.py | 92 +++++++++------- terracotta/drivers/sqlite_remote.py | 37 +++++-- terracotta/xyz.py | 3 +- 15 files changed, 636 insertions(+), 199 deletions(-) delete mode 100644 docs/drivers/mysql.rst delete mode 100644 docs/drivers/sqlite-remote.rst delete mode 100644 docs/drivers/sqlite.rst diff --git a/docs/api.rst b/docs/api.rst index 3cdffdb6..1845dd43 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -8,30 +8,37 @@ Get and set runtime settings .. autofunction:: terracotta.update_settings +.. _drivers: + Get a driver instance --------------------- .. autofunction:: terracotta.get_driver -Driver interface ----------------- - -.. seealso:: - - The following class defines the common interface for all Terracotta - drivers. For a reference on a specific drivers refer to :ref:`available-drivers`. +SQLite driver +------------- -.. autoclass:: terracotta.drivers.base.Driver +.. autoclass:: terracotta.drivers.sqlite.SQLiteDriver :members: + :undoc-members: + :special-members: __init__ + :inherited-members: -.. _available-drivers: +Remote SQLite driver +-------------------- -Available drivers ------------------ +.. autoclass:: terracotta.drivers.sqlite_remote.RemoteSQLiteDriver + :members: + :undoc-members: + :special-members: __init__ + :inherited-members: + :exclude-members: delete, insert, create -.. toctree:: - :maxdepth: 1 +MySQL driver +------------ - drivers/sqlite - drivers/sqlite-remote - drivers/mysql +.. autoclass:: terracotta.drivers.mysql.MySQLDriver + :members: + :undoc-members: + :special-members: __init__ + :inherited-members: diff --git a/docs/conf.py b/docs/conf.py index 319f3a71..8e3a4cb7 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -78,8 +78,10 @@ # -- Extension settings -------------------------------------------------------- -autodoc_member_order = 'bysource' -intersphinx_mapping = {'python': ('https://docs.python.org/3', None)} +intersphinx_mapping = { + 'python': ('https://docs.python.org/3', None), + 'numpy': ('http://docs.scipy.org/doc/numpy/', None), +} # -- Options for HTML output ------------------------------------------------- @@ -96,8 +98,8 @@ 'description': 'A light-weight, versatile XYZ tile server built with Flask and Rasterio', 'code_font_family': "'Roboto Mono', 'Consolas', 'Menlo', 'Deja Vu Sans Mono', " "'Bitstream Vera Sans Mono', monospace", - 'font_family': "'Lato', 'minion pro', 'bell mt', Georgia, 'Hiragino Mincho Pro', sans-serif", - 'head_font_family': "'Lato', 'Garamond', 'Georgia', sans-serif", + 'font_family': "'Lato', Arial, sans-serif", + 'head_font_family': "'Lato', Arial, sans-serif", 'body_text': '#000', 'sidebar_header': '#4B4032', 'sidebar_text': '#49443E', @@ -107,7 +109,7 @@ 'github_button': 'true', 'github_type': 'star', 'travis_button': 'true', - 'codecov_button': 'true', + 'codecov_button': 'true' } diff --git a/docs/drivers/mysql.rst b/docs/drivers/mysql.rst deleted file mode 100644 index 700a7a88..00000000 --- a/docs/drivers/mysql.rst +++ /dev/null @@ -1,7 +0,0 @@ -MySQL -===== - -.. autoclass:: terracotta.drivers.mysql.MySQLDriver - :members: - :undoc-members: - :inherited-members: diff --git a/docs/drivers/sqlite-remote.rst b/docs/drivers/sqlite-remote.rst deleted file mode 100644 index 629a8467..00000000 --- a/docs/drivers/sqlite-remote.rst +++ /dev/null @@ -1,8 +0,0 @@ -Remote SQLite -============= - -.. autoclass:: terracotta.drivers.sqlite_remote.RemoteSQLiteDriver - :members: - :undoc-members: - :inherited-members: - :exclude-members: delete, insert, create \ No newline at end of file diff --git a/docs/drivers/sqlite.rst b/docs/drivers/sqlite.rst deleted file mode 100644 index a167771a..00000000 --- a/docs/drivers/sqlite.rst +++ /dev/null @@ -1,7 +0,0 @@ -SQLite -====== - -.. autoclass:: terracotta.drivers.sqlite.SQLiteDriver - :members: - :undoc-members: - :inherited-members: diff --git a/docs/get-started.rst b/docs/get-started.rst index 51d43ce0..396ac717 100644 --- a/docs/get-started.rst +++ b/docs/get-started.rst @@ -7,23 +7,79 @@ Installation ------------ On most systems, the easiest way to install Terracotta is `through the -Conda package manager `__. After -installing ``conda``, the following command creates a new environment -containing all dependencies and Terracotta: +Conda package manager `__. Just +install ``conda``, clone the repository, and execute the following command +to create a new environment containing all dependencies and Terracotta: -.. code:: bash +.. code-block:: bash $ conda env create -f environment.yml If you already have a Python 3.6 installation that you want to use, you can just run -.. code:: bash +.. code-block:: bash $ pip install -e . in the root of the Terracotta repository instead. +.. seealso:: + + If you are using Windows 10 and find yourself struggling with installing + Terracotta, :doc:`check out our Windows 10 installation guide `! + + +Usage in a nutshell +------------------- + +The simplest way to use Terracotta is to cycle through the following commands: + +1. :doc:`terracotta optimize-rasters ` to + pre-process your raster files; +2. :doc:`terracotta ingest ` to create a database; +3. :doc:`terracotta serve ` to spawn a server; and +4. :doc:`terracotta connect ` to connect to this server. + +The following sections guide you through these steps in more detail. + + +Data exploration through Terracotta +----------------------------------- + +If you have some raster files lying around (e.g. in GeoTiff format), +you can use Terracotta to serve them up. + +Assume you are in a folder containing some files named with the pattern +:file:`S2A__.tif`. You can start a Terracotta server via + +.. code-block:: bash + + $ terracotta serve -r {}_{date}_{band}.tif + +.. note:: + + Terracotta profits heavily from the cloud-optimized GeoTiff format. + If your raster files are not cloud-optimized or you are unsure, + you can preprocess them with + :doc:`terracotta optimize-rasters `. + +which will serve your data at ``http://localhost:5000``. Try the following +URLs and see what happens: + +- `http://localhost:5000/keys`__ +- `http://localhost:5000/datasets`__ +- `http://localhost:5000/apidoc`__ + +Because it is cumbersome to explore a Terracotta instance by manually +constructing URLs, we have built a tool that lets you inspect it +interactively: + +.. code-block:: bash + + $ terracotta connect localhost:5000 + + Creating a raster database -------------------------- @@ -35,11 +91,11 @@ store: 1. Through the CLI ++++++++++++++++++ -A simple but limited way to build a database is to use the command line -interface. All you need to do is to point Terracotta to a folder of -(cloud-optimized) GeoTiffs: +A simple but limited way to build a database is to use +:doc:`terracotta ingest `. All you need to do is +to point Terracotta to a folder of (cloud-optimized) GeoTiffs: -.. code:: bash +.. code-block:: bash $ terracotta ingest \ /path/to/gtiffs/{sensor}_{name}_{date}_{band}.tif \ @@ -51,15 +107,16 @@ the given pattern into it. For available options, see -.. code:: bash +.. code-block:: bash $ terracotta ingest --help 2. Using the Python API +++++++++++++++++++++++ -Terracotta’s driver API gives you fine-grained control over ingestion -and retrieval. Metadata can be computed at three different times: +:ref:`Terracotta’s driver API ` gives you fine-grained control +over ingestion and retrieval. Metadata can be computed at three +different times: 1. Automatically during a call to ``driver.insert`` (fine for most applications); @@ -83,13 +140,34 @@ database into an S3 bucket. :download:`Download the script ` -Note that the above script is just a simple example to show you some -capabilities of the Terracotta Python API. More sophisticated solutions -could e.g. attach additional metadata to database entries, or accept -parameters from the command line. +.. note:: -Serving data ------------- + The above script is just a simple example to show you some + capabilities of the Terracotta Python API. More sophisticated solutions + could e.g. attach additional metadata to database entries, process + many rasters in parallel, or accept parameters from the command line. + + +Serving data from a raster database +----------------------------------- + +After creating a database, you can use +:doc:`terracotta serve ` to serve the rasters +inserted into it: + +.. code-block:: bash + + $ terracotta serve -d /path/to/database.sqlite + +To explore the server, you can once again use +:doc:`terracotta connect `: + +.. code-block:: bash + + $ terracotta connect localhost:5000 + +.. note:: -Connecting to a running Terracotta server ------------------------------------------ \ No newline at end of file + The server spawned by ``terracotta serve`` is indended for development + and data exploration only. For sophisticated production deployments, + :doc:`have a look at our tutorials `. diff --git a/docs/settings.rst b/docs/settings.rst index 9128882d..76cdf52f 100644 --- a/docs/settings.rst +++ b/docs/settings.rst @@ -15,9 +15,9 @@ settings in several different ways: $ export TC_UPSAMPLING_METHOD=cubic - will set the corresponding setting - ``UPSAMPLING_METHOD`` to ``cubic`` in all Terracotta instances. This is particularly useful - for serverless deployments. You can set list values in JSON array notation: + will set the corresponding setting ``UPSAMPLING_METHOD`` to ``cubic`` in all Terracotta + instances. This is particularly useful for serverless deployments. You can set list + values in JSON array notation: .. code-block:: bash @@ -60,4 +60,5 @@ All runtime settings are contained in the following :class:`~typing.NamedTuple`. `have a look at the TerracottaSettings source code <_modules/terracotta/config.html#TerracottaSettings>`_. .. autoclass:: terracotta.config.TerracottaSettings - :members: \ No newline at end of file + :members: + :member-order: bysource \ No newline at end of file diff --git a/docs/tutorials/windows.rst b/docs/tutorials/windows.rst index 6d0ce4ad..19b93231 100644 --- a/docs/tutorials/windows.rst +++ b/docs/tutorials/windows.rst @@ -1,3 +1,160 @@ Setting up a Terracotta environment on Windows 10 ================================================= +Terracotta comes with full support for Windows 10, even though setup +might be more complicated compared to Unix systems. + + +Set up conda and install Terracotta +----------------------------------- + +1. `Download and install Git for Windows `__. + +2. `Download and install Miniconda `__. + If you do not give the installer permissions to append ``conda`` to your PATH, + you will have to use the Anaconda shell for the following steps. + +3. Clone the Terracotta repository to your hard drive by running + + .. code-block:: bash + + $ git clone https://github.com/DHI-GRAS/terracotta.git + + Alternatively, + `you can choose and download a release version `__. + +4. Go to the Terracotta folder, and run + + .. code-block:: bash + + $ conda env create -f environment.yml + + If the command finished without errors, you have successfully installed + Terracotta. + +5. Before using Terracotta, activate the environment via + + .. code-block:: bash + + $ conda activate terracotta + + You can now use the :doc:`Terracotta CLI <../cli>`: + + .. code-block:: bash + + $ terracotta --help + + +Optional: Configure AWS credentials +----------------------------------- + +Terracotta unfolds its full potential when used with cloud services. All drivers +support raster files located on AWS S3, and databases on S3 (through the +:class:`~terracotta.drivers.sqlite_remote.RemoteSQLiteDriver`) or RDS (through the +:class:`~terracotta.drivers.mysql.MySQLDriver`). To use these features, you need +to create an account and authenticate with it. + +1. If you do not have an account on AWS yet, + `just head over and create one `__. + +2. You will need to create an IAM user that has programmatic access to your account. + For that purpose, `go to the IAM service `__ + and create a new IAM user. + + In the easiest setup, you can give it full permission to your account + (but make sure to keep the key secret). For that, enter a username (such as + ``awscli``), check the box ``Programmatic access``, and attach the + ``AdministratorAccess`` policy. + +3. After you have created the IAM user, AWS will show you the corresponding ID and + access key. Save those for later. + +4. Install the AWS command line tools by executing + + .. code-block:: bash + + $ conda activate terracotta + $ pip install awscli + + You can now use the AWS CLI: + + .. code-block:: bash + + $ aws --help + +5. Configure the credentials to use with the AWS CLI: + + .. code-block:: bash + + $ aws configure + + When asked for it, paste the ID and key of the IAM user you created in step 2. + You will also have to choose a default AWS region, e.g. ``eu-central-1``. + +6. You should now be able to use your AWS account programmatically. You can try this via + + .. code-block:: bash + + $ aws s3 ls + + You should now see a list of your S3 buckets if you have created any. + +By configuring the AWS credentials through the AWS CLI, Terracotta is now able to access +all of your resources on AWS. + + +Optional: Set up Zappa on WSL +----------------------------- + +We rely on the magic provided by `Zappa `__ to deploy +Terracotta on AWS λ. Since AWS λ workers run on Linux, we cannot use a Windows environment +for deployment. This is why we rely on the Windows subsystem for Linux (WSL). + +1. First up, `install the Windows subsystem for Linux `__. + You can install any Linux flavor you want, but in this tutorial we are using Ubuntu. + +2. This and all further steps should be executed in a WSL shell. We will have to re-install + Terracotta and its dependencies inside Linux. + + We will start by installing Python 3.6 and some libraries: + + .. code-block:: bash + + $ sudo add-apt-repository ppa:deadsnakes/ppa + $ sudo apt update + $ sudo apt install build-essential gdal-bin git libgdal-dev python3.6-dev + +3. Create a new virtual Python environment that we will use to deploy Terracotta: + + .. code-block:: bash + + $ pip install virtualenv --user + $ virtualenv --python=python3.6 ~/envs/tc-deploy + + Activate the new environment by running + + .. code-block:: bash + + $ source ~/envs/tc-deploy/bin/activate + +4. Clone Terracotta inside Linux: + + .. code-block:: bash + + $ git clone https://github.com/DHI-GRAS/terracotta.git + +5. Switch to the Terracotta folder and install the Zappa requirements and Terracotta: + + .. code-block:: bash + + $ pip install -r zappa_requirements.txt + $ pip install -e . + +6. Install and configure the AWS CLI: + + .. code-block:: bash + + $ pip install awscli + $ aws configure + +And you're done! You should now be able to :doc:`deploy Terracotta on AWS λ `. diff --git a/terracotta/drivers/__init__.py b/terracotta/drivers/__init__.py index e64e4960..f7afa9bb 100644 --- a/terracotta/drivers/__init__.py +++ b/terracotta/drivers/__init__.py @@ -57,9 +57,9 @@ def get_driver(url_or_path: URLOrPathType, provider: str = None) -> Driver: Arguments: url_or_path: A path indentifying the database to connect to. - The expected format depends on the driver provider. + The expected format depends on the driver provider. provider: Driver provider to use (one of sqlite, sqlite-remote, mysql; - default: auto-detect). + default: auto-detect). Example: diff --git a/terracotta/drivers/base.py b/terracotta/drivers/base.py index 572883b6..505b7ba7 100644 --- a/terracotta/drivers/base.py +++ b/terracotta/drivers/base.py @@ -22,13 +22,14 @@ def inner(self: Driver, *args: Any, **kwargs: Any) -> T: class Driver(ABC): - """Abstract base class for all data backends. + """Abstract base class for all Terracotta data backends. - Defines a common interface for all handlers. + Defines a common interface for all drivers. """ - RESERVED_KEYS = ('limit', 'page') + _RESERVED_KEYS = ('limit', 'page') - key_names: Tuple[str] + db_version: str #: Terracotta version used to create the database + key_names: Tuple[str] #: Names of all keys defined by the database @abstractmethod def __init__(self, url_or_path: str) -> None: @@ -37,40 +38,73 @@ def __init__(self, url_or_path: str) -> None: @abstractmethod def create(self, keys: Sequence[str], *args: Any, key_descriptions: Mapping[str, str] = None, **kwargs: Any) -> None: - """Create a new, empty data storage""" + # Create a new, empty database (driver dependent) pass @abstractmethod def connect(self) -> contextlib.AbstractContextManager: - """Context manager to connect to a given database and clean up on exit.""" + """Context manager to connect to a given database and clean up on exit. + + This allows you to pool interactions with the database to prevent possibly + expensive reconnects, or to roll back several interactions if one of them fails. + + Note: + + Make sure to call :meth:`create` on a fresh database before using this method. + + Example: + + >>> import terracotta as tc + >>> driver = tc.get_driver('tc.sqlite') + >>> with driver.connect(): + ... for keys, dataset in datasets.items(): + ... # connection will be kept open between insert operations + ... driver.insert(keys, dataset) + + """ pass @abstractmethod def get_keys(self) -> OrderedDict: - """Get all known keys and their fulltext descriptions.""" + """Get all known keys and their fulltext descriptions. + + Returns: + + An :class:`~collections.OrderedDict` in the form + ``{key_name: key_description}`` + + """ pass @abstractmethod def get_datasets(self, where: Mapping[str, str] = None, limit: int = 500, page: int = 1) -> Dict[Tuple[str, ...], Any]: - """Get all known dataset key combinations matching the given pattern (all if not given). - - Return dict values are a handle to retrieve data (e.g. file path or callback). - """ + # Get all known dataset key combinations matching the given constraints, + # and a handle to retrieve the data (driver dependent) pass @abstractmethod def get_metadata(self, keys: Union[Sequence[str], Mapping[str, str]]) -> Dict[str, Any]: """Return all stored metadata for given keys. - Metadata has to contain the following keys: - - range: global minimum and maximum value in dataset - - bounds: physical bounds covered by dataset - - convex_hull: GeoJSON shape specifying total data coverage - - percentiles: array of pre-computed percentiles in range(1, 100) - - mean: global mean - - stdev: global standard deviation - - metadata: any additional client-relevant metadata + Arguments: + + keys: Keys of the requested dataset. Can either be given as a sequence of key values, + or as a mapping ``{key_name: key_value}``. + + Returns: + + A :class:`dict` with the values + + - ``range``: global minimum and maximum value in dataset + - ``bounds``: physical bounds covered by dataset in latitude-longitude projection + - ``convex_hull``: GeoJSON shape specifying total data coverage in latitude-longitude + projection + - ``percentiles``: array of pre-computed percentiles from 1% through 99% + - ``mean``: global mean + - ``stdev``: global standard deviation + - ``metadata``: any additional client-relevant metadata + """ pass @@ -81,30 +115,62 @@ def get_raster_tile(self, keys: Union[Sequence[str], Mapping[str, str]], *, tile_size: Sequence[int] = (256, 256), preserve_values: bool = False, asynchronous: bool = False) -> Any: - """Get raster tile as a NumPy array for given keys and bounds. + """Load a raster tile with given keys and bounds. + + Arguments: + + keys: Keys of the requested dataset. Can either be given as a sequence of key values, + or as a mapping ``{key_name: key_value}``. + bounds: Physical bounds of the tile to read, in Web Mercator projection (EPSG3857). + Reads the whole dataset if not given. + tile_size: Shape of the output array to return. Must be two-dimensional. + Defaults to :attr:`~terracotta.config.TerracottaSettings.DEFAULT_TILE_SIZE`. + preserve_values: Whether to preserve exact numerical values (e.g. when reading + categorical data). Sets all interpolation to nearest neighbor. + asynchronous: If given, the tile will be read asynchronously in a separate thread. + This function will return immediately with a :class:`~concurrent.futures.Future` + that can be used to retrieve the result. + + Returns: + + Requested tile as :class:`~numpy.ma.MaskedArray` of shape ``tile_size`` if + ``asynchronous=False``, otherwise a :class:`~concurrent.futures.Future` containing + the result. - If asynchronous=True, returns a Future containing the result instead. """ pass @staticmethod @abstractmethod def compute_metadata(data: Any, *, - extra_metadata: Any = None) -> Dict[str, Any]: - """Compute metadata for a given input file.""" + extra_metadata: Any = None, + **kwargs: Any) -> Dict[str, Any]: + # Compute metadata for a given input file (driver dependent) pass @abstractmethod - def insert(self, *args: Any, - metadata: Mapping[str, Any] = None, - skip_metadata: bool = False, - **kwargs: Any) -> None: - """Register a new dataset. Used to populate data storage.""" + def insert(self, keys: Union[Sequence[str], Mapping[str, str]], + *args: Any, **kwargs: Any) -> None: + """Register a new dataset. Used to populate metadata database. + + Arguments: + + keys: Keys of the dataset. Can either be given as a sequence of key values, or + as a mapping ``{key_name: key_value}``. + + """ pass @abstractmethod def delete(self, keys: Union[Sequence[str], Mapping[str, str]]) -> None: - """Remove a dataset from metadata storage.""" + """Remove a dataset from the metadata database. + + Arguments: + + keys: Keys of the dataset. Can either be given as a sequence of key values, or + as a mapping ``{key_name: key_value}``. + + """ pass def __repr__(self) -> str: diff --git a/terracotta/drivers/mysql.py b/terracotta/drivers/mysql.py index 82a66b38..8a3a604a 100644 --- a/terracotta/drivers/mysql.py +++ b/terracotta/drivers/mysql.py @@ -8,16 +8,16 @@ Mapping, Any, Optional, cast, TypeVar, NamedTuple) from collections import OrderedDict import contextlib +from contextlib import ContextDecorator import re import json import urllib.parse as urlparse from urllib.parse import ParseResult -from pathlib import Path import numpy as np import pymysql from pymysql.connections import Connection -from pymysql.cursors import DictCursor # noqa: F401 +from pymysql.cursors import DictCursor from terracotta import get_settings, __version__ from terracotta.drivers.raster_base import RasterDriver @@ -29,7 +29,7 @@ T = TypeVar('T') _ERROR_ON_CONNECT = ( - 'Could not retrieve version from database. Make sure that the given path points ' + 'Could not connect to database. Make sure that the given path points ' 'to a valid Terracotta database, and that you ran driver.create().' ) @@ -53,18 +53,24 @@ class MySQLCredentials(NamedTuple): class MySQLDriver(RasterDriver): - """MySQL-backed raster driver. + """A MySQL-backed raster driver. + + Assumes raster data to be present in separate GDAL-readable files on disk or remotely. + Stores metadata and paths to raster files in MySQL. + + Requires a running MySQL server. The MySQL database consists of 4 different tables: - - `terracotta`: Metadata about the database itself. - - `keys`: Contains a single column holding all available keys. - - `datasets`: Maps indices to raster file path. - - `metadata`: Contains actual metadata as separate columns. Indexed via keys. + - ``terracotta``: Metadata about the database itself. + - ``key_names``: Contains two columns holding all available keys and their description. + - ``datasets``: Maps key values to physical raster path. + - ``metadata``: Contains actual metadata as separate columns. Indexed via key values. + This driver caches raster data and key names, but not metadata. """ - MAX_PRIMARY_KEY_LENGTH = 767 // 4 # Max key length for MySQL is at least 767B - METADATA_COLUMNS: Tuple[Tuple[str, ...], ...] = ( + _MAX_PRIMARY_KEY_LENGTH = 767 // 4 # Max key length for MySQL is at least 767B + _METADATA_COLUMNS: Tuple[Tuple[str, ...], ...] = ( ('bounds_north', 'REAL'), ('bounds_east', 'REAL'), ('bounds_south', 'REAL'), @@ -78,17 +84,28 @@ class MySQLDriver(RasterDriver): ('percentiles', 'BLOB'), ('metadata', 'LONGTEXT') ) - CHARSET: str = 'utf8mb4' + _CHARSET: str = 'utf8mb4' + + def __init__(self, mysql_path: str) -> None: + """Initialize the MySQLDriver. + + This should not be called directly, use :func:`~terracotta.get_driver` instead. + + Arguments: + + mysql_path: URL to running MySQL server, in the form + ``mysql://username:password@hostname/database`` + + """ - def __init__(self, path: Union[str, Path]) -> None: settings = get_settings() self.DB_CONNECTION_TIMEOUT: int = settings.DB_CONNECTION_TIMEOUT - con_params = urlparse.urlparse(str(path)) + con_params = urlparse.urlparse(mysql_path) if not con_params.hostname: - con_params = urlparse.urlparse(f'mysql://{path}') + con_params = urlparse.urlparse(f'mysql://{mysql_path}') if con_params.scheme != 'mysql': raise ValueError(f'unsupported URL scheme "{con_params.scheme}"') @@ -137,7 +154,7 @@ def _parse_db_name(con_params: ParseResult) -> str: @requires_connection @convert_exceptions(_ERROR_ON_CONNECT) def _get_db_version(self) -> str: - """Getter for db_version""" + """Terracotta version used to create the database""" cursor = self._cursor cursor.execute('SELECT version from terracotta') db_row = cast(Dict[str, str], cursor.fetchone()) @@ -162,17 +179,20 @@ def versiontuple(version_string: str) -> Sequence[str]: self._version_checked = True def _get_key_names(self) -> Tuple[str, ...]: - """Getter for key_names""" + """Names of all keys defined by the database""" return tuple(self.get_keys().keys()) key_names = cast(Tuple[str], property(_get_key_names)) + def connect(self) -> ContextDecorator: + return self._connect(check=True) + @contextlib.contextmanager - def connect(self, check: bool = True) -> Iterator: + def _connect(self, check: bool = True) -> Iterator: close = False try: if not self._connected: - with convert_exceptions('Unable to connect to database'): + with convert_exceptions(_ERROR_ON_CONNECT): self._connection = pymysql.connect( host=self._db_args.host, user=self._db_args.user, db=self._db_args.db, password=self._db_args.password, port=self._db_args.port, @@ -191,6 +211,7 @@ def connect(self, check: bool = True) -> Iterator: except Exception: self._connection.rollback() raise + finally: if close: self._cursor.close() @@ -200,9 +221,17 @@ def connect(self, check: bool = True) -> Iterator: @convert_exceptions('Could not create database') def create(self, keys: Sequence[str], key_descriptions: Mapping[str, str] = None) -> None: - """Initialize database with empty tables. + """Create and initialize database with empty tables. + + This must be called before opening the first connection. The MySQL database must not + exist already. + + Arguments: + + keys: Key names to use throughout the Terracotta database. + key_descriptions: Optional (but recommended) full-text description for some keys, + in the form of ``{key_name: description}``. - This must be called before opening the first connection. """ if key_descriptions is None: key_descriptions = {} @@ -215,15 +244,15 @@ def create(self, keys: Sequence[str], key_descriptions: Mapping[str, str] = None if not all(re.match(r'^\w+$', key) for key in keys): raise exceptions.InvalidKeyError('key names must be alphanumeric') - if any(key in self.RESERVED_KEYS for key in keys): - raise exceptions.InvalidKeyError(f'key names cannot be one of {self.RESERVED_KEYS!s}') + if any(key in self._RESERVED_KEYS for key in keys): + raise exceptions.InvalidKeyError(f'key names cannot be one of {self._RESERVED_KEYS!s}') for key in keys: if key not in key_descriptions: key_descriptions[key] = '' # total primary key length has an upper limit in MySQL - key_size = self.MAX_PRIMARY_KEY_LENGTH // len(keys) + key_size = self._MAX_PRIMARY_KEY_LENGTH // len(keys) key_type = f'VARCHAR({key_size})' with pymysql.connect(host=self._db_args.host, user=self._db_args.user, @@ -233,34 +262,30 @@ def create(self, keys: Sequence[str], key_descriptions: Mapping[str, str] = None binary_prefix=True, charset='utf8mb4') as con: con.execute(f'CREATE DATABASE {self._db_args.db}') - with self.connect(check=False): + with self._connect(check=False): cursor = self._cursor cursor.execute(f'CREATE TABLE terracotta (version VARCHAR(255)) ' - f'CHARACTER SET {self.CHARSET}') + f'CHARACTER SET {self._CHARSET}') cursor.execute('INSERT INTO terracotta VALUES (%s)', [str(__version__)]) cursor.execute(f'CREATE TABLE key_names (key_name {key_type}, ' - f'description VARCHAR(8000)) CHARACTER SET {self.CHARSET}') + f'description VARCHAR(8000)) CHARACTER SET {self._CHARSET}') key_rows = [(key, key_descriptions[key]) for key in keys] cursor.executemany('INSERT INTO key_names VALUES (%s, %s)', key_rows) key_string = ', '.join([f'{key} {key_type}' for key in keys]) cursor.execute(f'CREATE TABLE datasets ({key_string}, filepath VARCHAR(8000), ' - f'PRIMARY KEY({", ".join(keys)})) CHARACTER SET {self.CHARSET}') + f'PRIMARY KEY({", ".join(keys)})) CHARACTER SET {self._CHARSET}') column_string = ', '.join(f'{col} {col_type}' for col, col_type - in self.METADATA_COLUMNS) + in self._METADATA_COLUMNS) cursor.execute(f'CREATE TABLE metadata ({key_string}, {column_string}, ' - f'PRIMARY KEY ({", ".join(keys)})) CHARACTER SET {self.CHARSET}') + f'PRIMARY KEY ({", ".join(keys)})) CHARACTER SET {self._CHARSET}') # invalidate key cache self._db_keys = None def get_keys(self) -> OrderedDict: - """Retrieve key names and descriptions from database. - - Caches keys after first call. - """ if self._db_keys is None: self._db_keys = self._get_keys() return self._db_keys @@ -282,7 +307,6 @@ def _get_keys(self) -> OrderedDict: @convert_exceptions('Could not retrieve datasets') def get_datasets(self, where: Mapping[str, str] = None, page: int = 0, limit: int = None) -> Dict[Tuple[str, ...], str]: - """Retrieve keys of datasets matching given pattern""" cursor = self._cursor if limit is not None: @@ -349,7 +373,6 @@ def _decode_data(encoded: Mapping[str, Any]) -> Dict[str, Any]: @requires_connection @convert_exceptions('Could not retrieve metadata') def get_metadata(self, keys: Union[Sequence[str], Mapping[str, str]]) -> Dict[str, Any]: - """Retrieve metadata for given keys""" keys = tuple(self._key_dict_to_sequence(keys)) if len(keys) != len(self.key_names): @@ -374,7 +397,7 @@ def get_metadata(self, keys: Union[Sequence[str], Mapping[str, str]]) -> Dict[st assert row - data_columns, _ = zip(*self.METADATA_COLUMNS) + data_columns, _ = zip(*self._METADATA_COLUMNS) encoded_data = {col: row[col] for col in self.key_names + data_columns} return self._decode_data(encoded_data) @@ -387,7 +410,6 @@ def insert(self, metadata: Mapping[str, Any] = None, skip_metadata: bool = False, override_path: str = None) -> None: - """Insert a dataset into the database""" cursor = self._cursor if len(keys) != len(self.key_names): @@ -418,7 +440,6 @@ def insert(self, @requires_connection @convert_exceptions('Could not write to database') def delete(self, keys: Union[Sequence[str], Mapping[str, str]]) -> None: - """Delete a dataset from the database""" cursor = self._cursor if len(keys) != len(self.key_names): diff --git a/terracotta/drivers/raster_base.py b/terracotta/drivers/raster_base.py index cd131f5f..c68be987 100644 --- a/terracotta/drivers/raster_base.py +++ b/terracotta/drivers/raster_base.py @@ -15,7 +15,7 @@ import warnings import numpy as np -from cachetools import cachedmethod, LRUCache +from cachetools import cachedmethod, LFUCache from affine import Affine if TYPE_CHECKING: # pragma: no cover @@ -41,21 +41,86 @@ class RasterDriver(Driver): get_datasets has to return path to raster file as sole dict value. """ - TARGET_CRS: str = 'epsg:3857' - LARGE_RASTER_THRESHOLD: int = 10980 * 10980 - RIO_ENV_KEYS = dict(GDAL_DISABLE_READDIR_ON_OPEN='EMPTY_DIR', GDAL_TIFF_INTERNAL_MASK=True) + _TARGET_CRS: str = 'epsg:3857' + _LARGE_RASTER_THRESHOLD: int = 10980 * 10980 + _RIO_ENV_KEYS = dict(GDAL_DISABLE_READDIR_ON_OPEN='EMPTY_DIR', GDAL_TIFF_INTERNAL_MASK=True) @abstractmethod def __init__(self, *args: Any, **kwargs: Any) -> None: settings = get_settings() - self._raster_cache = LRUCache( + self._raster_cache = LFUCache( settings.RASTER_CACHE_SIZE, getsizeof=operator.attrgetter('nbytes') ) self._executor = concurrent.futures.ThreadPoolExecutor(max_workers=3) super().__init__(*args, **kwargs) + # specify signature and docstring for insert + @abstractmethod + def insert(self, + keys: Union[Sequence[str], Mapping[str, str]], + filepath: str, *, + metadata: Mapping[str, Any] = None, + skip_metadata: bool = False, + override_path: str = None) -> None: + """Insert a raster file into the database. + + Arguments: + + keys: Keys identifying the new dataset. Can either be given as a sequence of key + values, or as a mapping ``{key_name: key_value}``. + filepath: Path to the GDAL-readable raster file. + metadata: If not given (default), call :meth:`compute_metadata` with default arguments + to compute raster metadata. Otherwise, use the given values. This can be used to + decouple metadata computation from insertion, or to use the optional arguments + of :meth:`compute_metadata`. + skip_metadata: Do not compute any raster metadata (will be computed during the first + request instead). Use sparingly; this option has a detrimental result on the end + user experience and might lead to surprising results. Has no effect if ``metadata`` + is given. + override_path: Override the path to the raster file in the database. Use this option if + you intend to copy the data somewhere else after insertion (e.g. when moving files + to a cloud storage later on). + + """ + pass + + # specify signature and docstring for get_datasets + @abstractmethod + def get_datasets(self, where: Mapping[str, str] = None, + page: int = 0, limit: int = None) -> Dict[Tuple[str, ...], str]: + """Retrieve keys and file paths of datasets. + + Arguments: + + where: Constraints on returned datasets in the form ``{key_name: allowed_key_value}``. + Returns all datasets if not given (default). + page: Current page of results. Has no effect if ``limit`` is not given. + limit: If given, return at most this many datasets. Unlimited by default. + + + Returns: + + :class:`dict` containing + ``{(key_value1, key_value2, ...): raster_file_path}`` + + Example: + + >>> import terracotta as tc + >>> driver = tc.get_driver('tc.sqlite') + >>> driver.get_datasets() + { + ('reflectance', '20180101', 'B04'): 'reflectance_20180101_B04.tif', + ('reflectance', '20180102', 'B04'): 'reflectance_20180102_B04.tif', + } + >>> driver.get_datasets({'date': '20180101'}) + {('reflectance', '20180101', 'B04'): 'reflectance_20180101_B04.tif'} + + """ + pass + def _key_dict_to_sequence(self, keys: Union[Mapping[str, Any], Sequence[Any]]) -> List[Any]: + """Convert {key_name: key_value} to [key_value] with the correct key order.""" try: keys_as_mapping = cast(Mapping[str, Any], keys) return [keys_as_mapping[key] for key in self.key_names] @@ -97,7 +162,7 @@ def _hull_candidate_mask(mask: np.ndarray) -> np.ndarray: @staticmethod def _compute_image_stats_chunked(dataset: 'DatasetReader') -> Optional[Dict[str, Any]]: - """Loop over chunks and accumulate statistics""" + """Compute statistics for the given rasterio dataset by looping over chunks.""" from rasterio import features, warp, windows from shapely import geometry @@ -155,6 +220,7 @@ def _compute_image_stats_chunked(dataset: 'DatasetReader') -> Optional[Dict[str, @staticmethod def _compute_image_stats(dataset: 'DatasetReader', max_shape: Sequence[int] = None) -> Optional[Dict[str, Any]]: + """Compute statistics for the given rasterio dataset by reading it into memory.""" from rasterio import features, warp, transform from shapely import geometry @@ -214,7 +280,21 @@ def compute_metadata(cls, raster_path: str, *, max_shape: Sequence[int] = None) -> Dict[str, Any]: """Read given raster file and compute metadata from it. - This handles most of the heavy lifting during raster ingestion. + This handles most of the heavy lifting during raster ingestion. The returned metadata can + be passed directly to :meth:`insert`. + + Arguments: + + raster_path: Path to GDAL-readable raster file + extra_metadata: Any additional metadata to attach to the dataset. Will be + JSON-serialized and returned verbatim by :meth:`get_metadata`. + use_chunks: Whether to process the image in chunks (slower, but uses less memory). + If not given, use chunks for large images only. + max_shape: Gives the maximum number of pixels used in each dimension to compute + metadata. Setting this to a relatively small size such as ``(1024, 1024)`` will + result in much faster metadata computation for large images, at the expense of + inaccurate results. + """ import rasterio from rasterio import warp @@ -229,7 +309,7 @@ def compute_metadata(cls, raster_path: str, *, if use_chunks and max_shape is not None: raise ValueError('Cannot use both use_chunks and max_shape arguments') - with rasterio.Env(**cls.RIO_ENV_KEYS): + with rasterio.Env(**cls._RIO_ENV_KEYS): if not validate(raster_path): warnings.warn( f'Raster file {raster_path} is not a valid cloud-optimized GeoTIFF. ' @@ -250,12 +330,12 @@ def compute_metadata(cls, raster_path: str, *, ) if use_chunks is None and max_shape is None: - use_chunks = src.width * src.height > RasterDriver.LARGE_RASTER_THRESHOLD + use_chunks = src.width * src.height > RasterDriver._LARGE_RASTER_THRESHOLD if use_chunks: logger.debug( f'Computing metadata for file {raster_path} using more than ' - f'{RasterDriver.LARGE_RASTER_THRESHOLD // 10**6}M pixels, iterating ' + f'{RasterDriver._LARGE_RASTER_THRESHOLD // 10**6}M pixels, iterating ' 'over chunks' ) @@ -302,7 +382,7 @@ def _get_resampling_enum(method: str) -> Any: @staticmethod @trace('calculate_default_transform') def _calculate_default_transform(src_crs: Union[Dict[str, str], str], - target_crs: Union[Dict[str, str], str], + _TARGET_CRS: Union[Dict[str, str], str], width: int, height: int, *bounds: Number) -> Tuple[Affine, int, int]: @@ -321,7 +401,7 @@ def _calculate_default_transform(src_crs: Union[Dict[str, str], str], # transform image corners to target CRS dst_corner_sw, dst_corner_nw, dst_corner_se, dst_corner_ne = ( list(zip(*warp.transform( - src_crs, target_crs, + src_crs, _TARGET_CRS, [bounds[0], bounds[0], bounds[2], bounds[2]], [bounds[1], bounds[3], bounds[1], bounds[3]] ))) @@ -340,7 +420,7 @@ def _calculate_default_transform(src_crs: Union[Dict[str, str], str], target_res = (dst_corner_transform.a, dst_corner_transform.e) # get transform spanning whole bounds (not just projected corners) - dst_bounds = warp.transform_bounds(src_crs, target_crs, *bounds) + dst_bounds = warp.transform_bounds(src_crs, _TARGET_CRS, *bounds) dst_width = math.ceil((dst_bounds[2] - dst_bounds[0]) / target_res[0]) dst_height = math.ceil((dst_bounds[1] - dst_bounds[3]) / target_res[1]) dst_transform = transform.from_bounds(*dst_bounds, width=dst_width, height=dst_height) @@ -378,7 +458,7 @@ def _get_raster_tile(self, path: str, *, downsampling_enum = self._get_resampling_enum(downsampling_method) with contextlib.ExitStack() as es: - es.enter_context(rasterio.Env(**self.RIO_ENV_KEYS)) + es.enter_context(rasterio.Env(**self._RIO_ENV_KEYS)) try: with trace('open_dataset'): src = es.enter_context(rasterio.open(path)) @@ -387,10 +467,10 @@ def _get_raster_tile(self, path: str, *, # compute suggested resolution and bounds in target CRS dst_transform, _, _ = self._calculate_default_transform( - src.crs, self.TARGET_CRS, src.width, src.height, *src.bounds + src.crs, self._TARGET_CRS, src.width, src.height, *src.bounds ) dst_res = (abs(dst_transform.a), abs(dst_transform.e)) - dst_bounds = warp.transform_bounds(src.crs, self.TARGET_CRS, *src.bounds) + dst_bounds = warp.transform_bounds(src.crs, self._TARGET_CRS, *src.bounds) if bounds is None: bounds = dst_bounds @@ -415,7 +495,7 @@ def _get_raster_tile(self, path: str, *, # construct VRT vrt = es.enter_context( WarpedVRT( - src, crs=self.TARGET_CRS, resampling=upsampling_enum, add_alpha=True, + src, crs=self._TARGET_CRS, resampling=upsampling_enum, add_alpha=True, transform=vrt_transform, width=vrt_width, height=vrt_height ) ) @@ -456,16 +536,18 @@ def _get_raster_tile(self, path: str, *, def get_raster_tile(self, keys: Union[Sequence[str], Mapping[str, str]], *, bounds: Sequence[float] = None, - tile_size: Sequence[int] = (256, 256), + tile_size: Sequence[int] = None, preserve_values: bool = False, asynchronous: bool = False) -> Any: - """Load tile with given keys and bounds""" settings = get_settings() key_tuple = tuple(self._key_dict_to_sequence(keys)) path = self.get_datasets(dict(zip(self.key_names, key_tuple))) assert len(path) == 1 path = path[key_tuple] + if tile_size is None: + tile_size = settings.DEFAULT_TILE_SIZE + # make sure all arguments are hashable task = functools.partial( self._get_raster_tile, @@ -476,7 +558,6 @@ def get_raster_tile(self, upsampling_method=settings.UPSAMPLING_METHOD, downsampling_method=settings.DOWNSAMPLING_METHOD ) - print(self._raster_cache.currsize) if asynchronous: return self._executor.submit(task) diff --git a/terracotta/drivers/sqlite.py b/terracotta/drivers/sqlite.py index f566b133..52da07c2 100644 --- a/terracotta/drivers/sqlite.py +++ b/terracotta/drivers/sqlite.py @@ -7,6 +7,7 @@ from typing import Any, Sequence, Mapping, Tuple, Union, Iterator, Dict, cast import os import contextlib +from contextlib import ContextDecorator import json import re import sqlite3 @@ -22,7 +23,7 @@ from terracotta.drivers.raster_base import RasterDriver _ERROR_ON_CONNECT = ( - 'Could not retrieve version from database. Make sure that the given path points ' + 'Could not connect to database. Make sure that the given path points ' 'to a valid Terracotta database, and that you ran driver.create().' ) @@ -37,25 +38,38 @@ def convert_exceptions(msg: str) -> Iterator: class SQLiteDriver(RasterDriver): - """SQLite-backed raster driver. + """An SQLite-backed raster driver. + + Assumes raster data to be present in separate GDAL-readable files on disk or remotely. + Stores metadata and paths to raster files in SQLite. + + This is the simplest Terracotta driver, as it requires no additional infrastructure. + The SQLite database is simply a file that can be stored together with the actual + raster files. Note: - This driver is not thread-safe. It is not possible to connect to the database - outside the main thread. + This driver requires the SQLite database to be physically present on the server. + For remote SQLite databases hosted on S3, use + :class:`~terracotta.drivers.sqlite_remote.RemoteSQLiteDriver`. The SQLite database consists of 4 different tables: - - `terracotta`: Metadata about the database itself. - - `keys`: Contains a single column holding all available keys. - - `datasets`: Maps indices to raster file path. - - `metadata`: Contains actual metadata as separate columns. Indexed via keys. + - ``terracotta``: Metadata about the database itself. + - ``keys``: Contains two columns holding all available keys and their description. + - ``datasets``: Maps key values to physical raster path. + - ``metadata``: Contains actual metadata as separate columns. Indexed via key values. + + This driver caches raster data, but not metadata. - This driver caches raster data in RasterDriver. + Warning: + + This driver is not thread-safe. It is not possible to connect to the database + outside the main thread. """ - KEY_TYPE: str = 'VARCHAR[256]' - METADATA_COLUMNS: Tuple[Tuple[str, ...], ...] = ( + _KEY_TYPE: str = 'VARCHAR[256]' + _METADATA_COLUMNS: Tuple[Tuple[str, ...], ...] = ( ('bounds_north', 'REAL'), ('bounds_east', 'REAL'), ('bounds_south', 'REAL'), @@ -71,7 +85,15 @@ class SQLiteDriver(RasterDriver): ) def __init__(self, path: Union[str, Path]) -> None: - """Use given database path to read and store metadata.""" + """Initialize the SQLiteDriver. + + This should not be called directly, use :func:`~terracotta.get_driver` instead. + + Arguments: + + path: File path to target SQLite database (may or may not exist yet) + + """ path = str(path) settings = get_settings() @@ -83,17 +105,15 @@ def __init__(self, path: Union[str, Path]) -> None: super().__init__(os.path.realpath(path)) + def connect(self) -> ContextDecorator: + return self._connect(check=True) + @contextlib.contextmanager - def connect(self, check: bool = True) -> Iterator: + def _connect(self, check: bool = True) -> Iterator: try: close = False if not self._connected: - # if check and not os.path.isfile(self.path): - # raise exceptions.InvalidDatabaseError( - # f'Database file {self.path} does not exist ' - # f'(run driver.create() before connecting to a new database)' - # ) - with convert_exceptions('Unable to connect to database'): + with convert_exceptions(_ERROR_ON_CONNECT): self._connection = sqlite3.connect( self.path, timeout=self.DB_CONNECTION_TIMEOUT ) @@ -118,7 +138,7 @@ def connect(self, check: bool = True) -> Iterator: @requires_connection @convert_exceptions(_ERROR_ON_CONNECT) def _get_db_version(self) -> str: - """Getter for db_version""" + """Terracotta version used to create the database""" conn = self._connection db_row = conn.execute('SELECT version from terracotta').fetchone() return db_row['version'] @@ -141,16 +161,23 @@ def versiontuple(version_string: str) -> Sequence[str]: ) def _get_key_names(self) -> Tuple[str, ...]: - """Getter for key_names""" + """Names of all keys defined by the database""" return tuple(self.get_keys().keys()) key_names = cast(Tuple[str], property(_get_key_names)) @convert_exceptions('Could not create database') def create(self, keys: Sequence[str], key_descriptions: Mapping[str, str] = None) -> None: - """Initialize database file with empty tables. + """Create and initialize database with empty tables. + + This must be called before opening the first connection. Tables must not exist already. + + Arguments: + + keys: Key names to use throughout the Terracotta database. + key_descriptions: Optional (but recommended) full-text description for some keys, + in the form of ``{key_name: description}``. - This must be called before opening the first connection. """ if key_descriptions is None: key_descriptions = {} @@ -163,35 +190,34 @@ def create(self, keys: Sequence[str], key_descriptions: Mapping[str, str] = None if not all(re.match(r'^\w+$', key) for key in keys): raise exceptions.InvalidKeyError('key names must be alphanumeric') - if any(key in self.RESERVED_KEYS for key in keys): - raise exceptions.InvalidKeyError(f'key names cannot be one of {self.RESERVED_KEYS!s}') + if any(key in self._RESERVED_KEYS for key in keys): + raise exceptions.InvalidKeyError(f'key names cannot be one of {self._RESERVED_KEYS!s}') for key in keys: if key not in key_descriptions: key_descriptions[key] = '' - with self.connect(check=False): + with self._connect(check=False): conn = self._connection conn.execute('CREATE TABLE terracotta (version VARCHAR[255])') conn.execute('INSERT INTO terracotta VALUES (?)', [str(__version__)]) - conn.execute(f'CREATE TABLE keys (key {self.KEY_TYPE}, description VARCHAR[max])') + conn.execute(f'CREATE TABLE keys (key {self._KEY_TYPE}, description VARCHAR[max])') key_rows = [(key, key_descriptions[key]) for key in keys] conn.executemany('INSERT INTO keys VALUES (?, ?)', key_rows) - key_string = ', '.join([f'{key} {self.KEY_TYPE}' for key in keys]) + key_string = ', '.join([f'{key} {self._KEY_TYPE}' for key in keys]) conn.execute(f'CREATE TABLE datasets ({key_string}, filepath VARCHAR[8000], ' f'PRIMARY KEY({", ".join(keys)}))') column_string = ', '.join(f'{col} {col_type}' for col, col_type - in self.METADATA_COLUMNS) + in self._METADATA_COLUMNS) conn.execute(f'CREATE TABLE metadata ({key_string}, {column_string}, ' f'PRIMARY KEY ({", ".join(keys)}))') @requires_connection @convert_exceptions('Could not retrieve keys from database') def get_keys(self) -> OrderedDict: - """Retrieve key names and descriptions from database""" conn = self._connection key_rows = conn.execute('SELECT * FROM keys') @@ -205,7 +231,6 @@ def get_keys(self) -> OrderedDict: @convert_exceptions('Could not retrieve datasets') def get_datasets(self, where: Mapping[str, str] = None, page: int = 0, limit: int = None) -> Dict[Tuple[str, ...], str]: - """Retrieve keys of datasets matching given pattern""" conn = self._connection if limit is not None: @@ -272,7 +297,6 @@ def _decode_data(encoded: Mapping[str, Any]) -> Dict[str, Any]: @requires_connection @convert_exceptions('Could not retrieve metadata') def get_metadata(self, keys: Union[Sequence[str], Mapping[str, str]]) -> Dict[str, Any]: - """Retrieve metadata for given keys""" keys = tuple(self._key_dict_to_sequence(keys)) if len(keys) != len(self.key_names): @@ -297,7 +321,7 @@ def get_metadata(self, keys: Union[Sequence[str], Mapping[str, str]]) -> Dict[st assert row - data_columns, _ = zip(*self.METADATA_COLUMNS) + data_columns, _ = zip(*self._METADATA_COLUMNS) encoded_data = {col: row[col] for col in self.key_names + data_columns} return self._decode_data(encoded_data) @@ -310,7 +334,6 @@ def insert(self, metadata: Mapping[str, Any] = None, skip_metadata: bool = False, override_path: str = None) -> None: - """Insert a dataset into the database""" conn = self._connection if len(keys) != len(self.key_names): @@ -340,7 +363,6 @@ def insert(self, @requires_connection @convert_exceptions('Could not write to database') def delete(self, keys: Union[Sequence[str], Mapping[str, str]]) -> None: - """Delete a dataset from the database""" conn = self._connection if len(keys) != len(self.key_names): diff --git a/terracotta/drivers/sqlite_remote.py b/terracotta/drivers/sqlite_remote.py index 542d11a1..636d40dc 100644 --- a/terracotta/drivers/sqlite_remote.py +++ b/terracotta/drivers/sqlite_remote.py @@ -4,7 +4,7 @@ to be present on disk. """ -from typing import Any, Union, Iterator +from typing import Any, Iterator import os import tempfile import shutil @@ -12,7 +12,6 @@ import logging import contextlib import urllib.parse as urlparse -from pathlib import Path from cachetools import cachedmethod, TTLCache @@ -54,9 +53,24 @@ def _update_from_s3(remote_path: str, local_path: str) -> None: class RemoteSQLiteDriver(SQLiteDriver): - """SQLite-backed raster driver, supports databases stored remotely in an S3 bucket. + """An SQLite-backed raster driver, where the database file is stored remotely on S3. - Note: + Assumes raster data to be present in separate GDAL-readable files on disk or remotely. + Stores metadata and paths to raster files in SQLite. + + See also: + + :class:`~terracotta.drivers.sqlite.SQLiteDriver` for the local version of this + driver. + + The SQLite database is simply a file that can be stored together with the actual + raster files on S3. Before handling the first request, this driver will download a + temporary copy of the remote database file. It is thus not feasible for large databases. + + The local database copy will be updated in regular intervals defined by + :attr:`~terracotta.config.TerracottaSettings.REMOTE_DB_CACHE_TTL`. + + Warning: This driver is read-only. Any attempts to use the create, insert, or delete methods will throw a NotImplementedError. @@ -64,8 +78,17 @@ class RemoteSQLiteDriver(SQLiteDriver): """ path: str - def __init__(self, path: Union[str, Path]) -> None: - """Use given database URL to read metadata.""" + def __init__(self, remote_path: str) -> None: + """Initialize the RemoteSQLiteDriver. + + This should not be called directly, use :func:`~terracotta.get_driver` instead. + + Arguments: + + remote_path: S3 URL in the form ``s3://bucket/key`` to remote SQLite database + (has to exist). + + """ settings = get_settings() self.__rm = os.remove # keep reference to use in __del__ @@ -79,7 +102,7 @@ def __init__(self, path: Union[str, Path]) -> None: ) local_db_file.close() - self._remote_path: str = str(path) + self._remote_path: str = str(remote_path) self._checkdb_cache = TTLCache(maxsize=1, ttl=settings.REMOTE_DB_CACHE_TTL) super().__init__(local_db_file.name) diff --git a/terracotta/xyz.py b/terracotta/xyz.py index 4a80816b..644f9699 100644 --- a/terracotta/xyz.py +++ b/terracotta/xyz.py @@ -23,7 +23,8 @@ def get_tile_data(driver: Driver, if tile_xyz is None: # read whole dataset return driver.get_raster_tile( - keys, tile_size=tile_size, preserve_values=preserve_values, asynchronous=asynchronous + keys, tile_size=tile_size, preserve_values=preserve_values, + asynchronous=asynchronous ) # determine bounds for given tile From a9f5ac679307b87ea1370c0241786be95b012ab4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dion=20H=C3=A4fner?= Date: Thu, 29 Nov 2018 12:06:28 +0100 Subject: [PATCH 4/6] fix tests? --- .travis.yml | 1 + docs/tutorials/categorical.rst | 42 +++++++++++++++++--------- setup.cfg | 5 +-- terracotta/drivers/base.py | 5 +-- terracotta/drivers/mysql.py | 4 +-- terracotta/drivers/raster_base.py | 8 ++--- terracotta/drivers/sqlite.py | 4 +-- terracotta/scripts/optimize_rasters.py | 2 +- tests/drivers/test_drivers.py | 7 +---- 9 files changed, 43 insertions(+), 35 deletions(-) diff --git a/.travis.yml b/.travis.yml index 84ca8f24..b9622db0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,7 @@ language: python python: + - "3.7" - "3.6" env: diff --git a/docs/tutorials/categorical.rst b/docs/tutorials/categorical.rst index d1809b3f..724717b6 100644 --- a/docs/tutorials/categorical.rst +++ b/docs/tutorials/categorical.rst @@ -1,7 +1,7 @@ How to serve categorical data with Terracotta ============================================= -Categorical datasets are special in that the numerical pixel values +Categorical datasets are special , because the numerical pixel values carry no direct meaning, but rather encode which category or label the pixel belongs to. Because labels must be preserved, serving categorical data comes with its own set of complications: @@ -12,7 +12,7 @@ data comes with its own set of complications: Terracotta does not know categories and labels, but the API is flexible enough to give you the tools to build your own system and do the -interpretation in the frontend. Categorical data can be served by +interpretation in the frontend. You can serve categorical data by following these steps: During ingestion @@ -24,18 +24,19 @@ During ingestion ``[type, sensor, date, band]``, where ``type`` can take one of the values ``categorical``, ``index``, ``reflectance``, or whatever makes sense for your given application. + 2. Attach a mapping ``category name -> pixel value`` to the metadata of - your categorical dataset. Using the Python API, this could e.g. be - done like this: + your categorical dataset. Using the :doc:`Python API `, you + could do it like this: - .. code:: python + .. code-block:: python import terracotta as tc driver = tc.get_driver('terracotta.sqlite') - # assuming keys are [type, sensor, date, band] - keys = ['categorical', 'S2', '20181010', 'cloudmask'] + # assuming key names are [type, sensor, date, band] + key_values = ['categorical', 'S2', '20181010', 'cloudmask'] raster_path = 'cloud_mask.tif' category_map = { @@ -46,8 +47,12 @@ During ingestion } with driver.connect(): - metadata = driver.compute_metadata(raster_path, extra_metadata={'categories': category_map}) - driver.insert(keys, raster_path, metadata=metadata) + metadata = driver.compute_metadata( + raster_path, + extra_metadata={'categories': category_map} + ) + driver.insert(key_values, raster_path, metadata=metadata) + In the frontend --------------- @@ -62,7 +67,7 @@ can use the following functionality: ``example.com/metadata/categorical/S2/20181010/cloudmask``. The returned JSON object will contain a section like this: - .. code:: json + .. code-block:: json { "metadata": { @@ -77,15 +82,24 @@ can use the following functionality: - To get correctly labelled imagery, the frontend will have to pass an explicit color mapping of pixel values to colors by using - ``/singleband``\ ’s ``explicit_color_map`` argument. In our case, - this could look like this: - ``example.com/singleband/categorical/S2/20181010/cloudmask/{z}/{x}/{y}.png?colormap=explicit&explicit_color_map={"0": "99d594", "1": "2b83ba", "2": "ffffff", "3": "404040"}``. + ``/singleband``'s ``explicit_color_map`` argument. In our case, + this could look like this:: + + example.com/singleband/categorical/S2/20181010/cloudmask/ + {z}/{x}/{y}.png?colormap=explicit&explicit_color_map= + {"0": "99d594", "1": "2b83ba", "2": "ffffff", "3": "404040"} + + .. note:: + + Depending on your architecture, it might be required to encode all + special characters in the query, such as ``{``, ``}``, and ``:``. + This is e.g. the case when using AWS API Gateway / AWS λ. Supplying an explicit color map in this fashion suppresses stretching, and forces Terracotta to only use nearest neighbor resampling when reading the data. - Colors can be passed as hex strings (as in this example) or RGB color + Colors can be passed as hex strings (as in this example) or RGBA color tuples. In case you are looking for a nice color scheme for your categorical datasets, `color brewer `__ features some excellent suggestions. \ No newline at end of file diff --git a/setup.cfg b/setup.cfg index f597007b..1f1461df 100644 --- a/setup.cfg +++ b/setup.cfg @@ -17,14 +17,11 @@ filterwarnings = error # ignored by default ignore::ImportWarning + ignore::DeprecationWarning ignore::PendingDeprecationWarning # raised by Cython, usually harmless ignore:numpy.dtype size changed:RuntimeWarning ignore:numpy.ufunc size changed:RuntimeWarning - # raised by Werkzeug - ignore:Request.is_xhr is deprecated:DeprecationWarning - # raised by Moto - ignore:Flags not at the start of the expression:DeprecationWarning flake8-ignore = terracotta/_version.py ALL diff --git a/terracotta/drivers/base.py b/terracotta/drivers/base.py index 505b7ba7..fa5ae7ed 100644 --- a/terracotta/drivers/base.py +++ b/terracotta/drivers/base.py @@ -78,7 +78,7 @@ def get_keys(self) -> OrderedDict: @abstractmethod def get_datasets(self, where: Mapping[str, str] = None, - limit: int = 500, page: int = 1) -> Dict[Tuple[str, ...], Any]: + page: int = 0, limit: int = None) -> Dict[Tuple[str, ...], Any]: # Get all known dataset key combinations matching the given constraints, # and a handle to retrieve the data (driver dependent) pass @@ -150,13 +150,14 @@ def compute_metadata(data: Any, *, @abstractmethod def insert(self, keys: Union[Sequence[str], Mapping[str, str]], - *args: Any, **kwargs: Any) -> None: + handle: Any, **kwargs: Any) -> None: """Register a new dataset. Used to populate metadata database. Arguments: keys: Keys of the dataset. Can either be given as a sequence of key values, or as a mapping ``{key_name: key_value}``. + handle: Handle to access dataset (driver dependent). """ pass diff --git a/terracotta/drivers/mysql.py b/terracotta/drivers/mysql.py index 8a3a604a..f6f29b5c 100644 --- a/terracotta/drivers/mysql.py +++ b/terracotta/drivers/mysql.py @@ -8,7 +8,7 @@ Mapping, Any, Optional, cast, TypeVar, NamedTuple) from collections import OrderedDict import contextlib -from contextlib import ContextDecorator +from contextlib import AbstractContextManager import re import json import urllib.parse as urlparse @@ -184,7 +184,7 @@ def _get_key_names(self) -> Tuple[str, ...]: key_names = cast(Tuple[str], property(_get_key_names)) - def connect(self) -> ContextDecorator: + def connect(self) -> AbstractContextManager: return self._connect(check=True) @contextlib.contextmanager diff --git a/terracotta/drivers/raster_base.py b/terracotta/drivers/raster_base.py index c68be987..0fdf5bad 100644 --- a/terracotta/drivers/raster_base.py +++ b/terracotta/drivers/raster_base.py @@ -57,7 +57,7 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: # specify signature and docstring for insert @abstractmethod - def insert(self, + def insert(self, # type: ignore keys: Union[Sequence[str], Mapping[str, str]], filepath: str, *, metadata: Mapping[str, Any] = None, @@ -541,9 +541,9 @@ def get_raster_tile(self, asynchronous: bool = False) -> Any: settings = get_settings() key_tuple = tuple(self._key_dict_to_sequence(keys)) - path = self.get_datasets(dict(zip(self.key_names, key_tuple))) - assert len(path) == 1 - path = path[key_tuple] + datasets = self.get_datasets(dict(zip(self.key_names, key_tuple))) + assert len(datasets) == 1 + path = datasets[key_tuple] if tile_size is None: tile_size = settings.DEFAULT_TILE_SIZE diff --git a/terracotta/drivers/sqlite.py b/terracotta/drivers/sqlite.py index 52da07c2..bbe40116 100644 --- a/terracotta/drivers/sqlite.py +++ b/terracotta/drivers/sqlite.py @@ -7,7 +7,7 @@ from typing import Any, Sequence, Mapping, Tuple, Union, Iterator, Dict, cast import os import contextlib -from contextlib import ContextDecorator +from contextlib import AbstractContextManager import json import re import sqlite3 @@ -105,7 +105,7 @@ def __init__(self, path: Union[str, Path]) -> None: super().__init__(os.path.realpath(path)) - def connect(self) -> ContextDecorator: + def connect(self) -> AbstractContextManager: return self._connect(check=True) @contextlib.contextmanager diff --git a/terracotta/scripts/optimize_rasters.py b/terracotta/scripts/optimize_rasters.py index 9a4ad6d9..c2b888a2 100644 --- a/terracotta/scripts/optimize_rasters.py +++ b/terracotta/scripts/optimize_rasters.py @@ -77,7 +77,7 @@ def _prefered_compression_method() -> str: def _get_vrt(src: DatasetReader, rs_method: int) -> WarpedVRT: from terracotta.drivers.raster_base import RasterDriver - target_crs = RasterDriver.TARGET_CRS + target_crs = RasterDriver._TARGET_CRS vrt_transform, vrt_width, vrt_height = RasterDriver._calculate_default_transform( src.crs, target_crs, src.width, src.height, *src.bounds ) diff --git a/tests/drivers/test_drivers.py b/tests/drivers/test_drivers.py index 45d63d03..c98d2c58 100644 --- a/tests/drivers/test_drivers.py +++ b/tests/drivers/test_drivers.py @@ -131,13 +131,8 @@ def test_version_conflict(driver_path, provider, raster_file, monkeypatch): m.setattr(f'{db.__module__}.__version__', fake_version) db._version_checked = False - # works - with db.connect(check=False): - pass - - # fails with pytest.raises(exceptions.InvalidDatabaseError) as exc: - with db.connect(check=True): + with db.connect(): pass assert fake_version in str(exc.value) From 3e3ab4f0e91f63454850b2ad9277bb954f7f3ee5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dion=20H=C3=A4fner?= Date: Thu, 29 Nov 2018 12:19:05 +0100 Subject: [PATCH 5/6] fix tests? --- .travis.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.travis.yml b/.travis.yml index b9622db0..55e3db30 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,5 +1,7 @@ language: python +dist: xenial + python: - "3.7" - "3.6" From 6fb617ac2027b392484fed919e811a8f0a991751 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dion=20H=C3=A4fner?= Date: Thu, 29 Nov 2018 16:08:55 +0100 Subject: [PATCH 6/6] add hosted version of preview app and aws guide --- README.rst | 7 +- docs/conf.py | 31 +++-- docs/get-started.rst | 23 ++-- docs/index.rst | 13 +- docs/preview-app.rst | 4 + docs/settings.rst | 8 +- docs/tutorial.rst | 2 +- docs/tutorials/aws.rst | 218 +++++++++++++++++++++++++++++---- docs/tutorials/categorical.rst | 4 +- docs/tutorials/wsgi.rst | 7 +- 10 files changed, 257 insertions(+), 60 deletions(-) create mode 100644 docs/preview-app.rst diff --git a/README.rst b/README.rst index df440f80..f3a3de29 100644 --- a/README.rst +++ b/README.rst @@ -4,9 +4,10 @@ Terracotta ========== -`Try the demo `__ \| -`Read the docs `__ \| -`Explore the API `__ +`Try the demo `__ \| +`Read the docs `__ \| +`Explore the API `__ \| +`Satlas, powered by Terracotta `__ A light-weight, versatile XYZ tile server, built with Flask and Rasterio :earth_africa: diff --git a/docs/conf.py b/docs/conf.py index 8e3a4cb7..d9891fb7 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -49,9 +49,6 @@ 'sphinx_click.ext' ] -# Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] - # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # @@ -76,6 +73,28 @@ # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'friendly' +# -- Dirty HAXX to compile and serve preview app ----------------------------- + +preview_hostname = 'https://2truhxo59g.execute-api.eu-central-1.amazonaws.com/production' + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates', '../terracotta/client/templates'] + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static', '../terracotta/client/static'] + +html_additional_pages = { + 'preview-app': 'app.html', +} + +# Inject Jinja variables defined by Flask +html_context = { + 'hostname': preview_hostname, + 'url_for': lambda _, filename: f'_static/{filename}' +} + # -- Extension settings -------------------------------------------------------- intersphinx_mapping = { @@ -116,12 +135,6 @@ def setup(app): app.add_stylesheet('https://fonts.googleapis.com/css?family=Lato|Roboto+Mono') - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] - # Custom sidebar templates, must be a dictionary that maps document names # to template names. # diff --git a/docs/get-started.rst b/docs/get-started.rst index 396ac717..61e7a0f2 100644 --- a/docs/get-started.rst +++ b/docs/get-started.rst @@ -50,7 +50,7 @@ Data exploration through Terracotta If you have some raster files lying around (e.g. in GeoTiff format), you can use Terracotta to serve them up. -Assume you are in a folder containing some files named with the pattern +Assume you are in a folder containing some files named with the pattern :file:`S2A__.tif`. You can start a Terracotta server via .. code-block:: bash @@ -67,9 +67,9 @@ Assume you are in a folder containing some files named with the pattern which will serve your data at ``http://localhost:5000``. Try the following URLs and see what happens: -- `http://localhost:5000/keys`__ -- `http://localhost:5000/datasets`__ -- `http://localhost:5000/apidoc`__ +- `localhost:5000/keys `__ +- `localhost:5000/datasets `__ +- `localhost:5000/apidoc `__ Because it is cumbersome to explore a Terracotta instance by manually constructing URLs, we have built a tool that lets you inspect it @@ -79,6 +79,9 @@ interactively: $ terracotta connect localhost:5000 +If you did everything correctly, a new window should open in your browser, +showing something :doc:`similar to this `. + Creating a raster database -------------------------- @@ -91,7 +94,7 @@ store: 1. Through the CLI ++++++++++++++++++ -A simple but limited way to build a database is to use +A simple but limited way to build a database is to use :doc:`terracotta ingest `. All you need to do is to point Terracotta to a folder of (cloud-optimized) GeoTiffs: @@ -166,8 +169,10 @@ To explore the server, you can once again use $ terracotta connect localhost:5000 -.. note:: +However, the server spawned by ``terracotta serve`` is indended for +development and data exploration only. For sophisticated production +deployments, :doc:`have a look at our tutorials `. - The server spawned by ``terracotta serve`` is indended for development - and data exploration only. For sophisticated production deployments, - :doc:`have a look at our tutorials `. +If you are unsure which kind of deployment to choose, we recommend +you to try out a :doc:`serverless deployment on AWS λ `, +via the remote SQLite driver. diff --git a/docs/index.rst b/docs/index.rst index d2ce22ad..03923d0b 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -91,8 +91,7 @@ Already implemented drivers include: - **MySQL + GeoTiff**: Similar to the SQLite driver, but uses a centralized MySQL database to store metadata. This driver is an excellent candidate for deployments on cloud services, e.g. through - `AWS Aurora - Serverless `__. + `AWS Aurora Serverless `__. Web API ------- @@ -103,6 +102,11 @@ Every Terracotta deployment exposes the API it uses as a respectively. This is the best way to find out which API *your* deployment of Terracotta uses. +Why serverless? +--------------- + + + Limitations ----------- @@ -134,10 +138,11 @@ Contents .. toctree:: :maxdepth: 2 - + get-started settings cli api tutorial - issues \ No newline at end of file + issues + preview-app diff --git a/docs/preview-app.rst b/docs/preview-app.rst new file mode 100644 index 00000000..305a148d --- /dev/null +++ b/docs/preview-app.rst @@ -0,0 +1,4 @@ +Try Terracotta +============== + +This file is intentionally left blank diff --git a/docs/settings.rst b/docs/settings.rst index 76cdf52f..9643c1aa 100644 --- a/docs/settings.rst +++ b/docs/settings.rst @@ -10,7 +10,7 @@ settings in several different ways: - Terracotta is fully configurable through environment variables that are prefixed with ``TC_``. E.g., running - + .. code-block:: bash $ export TC_UPSAMPLING_METHOD=cubic @@ -54,11 +54,11 @@ Available runtime settings All runtime settings are contained in the following :class:`~typing.NamedTuple`. -.. seealso:: +.. seealso:: To see the types and default values of the settings, - `have a look at the TerracottaSettings source code <_modules/terracotta/config.html#TerracottaSettings>`_. + `have a look at the TerracottaSettings source code <_modules/terracotta/config.html#TerracottaSettings>`__. .. autoclass:: terracotta.config.TerracottaSettings :members: - :member-order: bysource \ No newline at end of file + :member-order: bysource diff --git a/docs/tutorial.rst b/docs/tutorial.rst index 25013612..d27cc8ea 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -6,7 +6,7 @@ These tutorials are supposed to help you get started with Terracotta; they are n as authoritive references or strict demonstrations of best practices. If you run into problems following any of the tutorials, please let us know by -`opening an issue `_. +`opening an issue `__. .. toctree:: :maxdepth: 1 diff --git a/docs/tutorials/aws.rst b/docs/tutorials/aws.rst index f552b0a8..67eadf8f 100644 --- a/docs/tutorials/aws.rst +++ b/docs/tutorials/aws.rst @@ -1,38 +1,208 @@ -A serverless Terracotta deployment on AWS -========================================= +A serverless Terracotta deployment on AWS λ +=========================================== -The easiest way to deploy Terracotta to AWS λ is by using -`Zappa `__. This repository contains -a template with sensible default values for most Zappa settings. +.. warning:: -.. note:: - Note that Zappa works best on Linux. Windows 10 users can use the - `Windows Subsystem for - Linux `__ to - deploy Terracotta. + While it is possible to use Terracotta entirely within AWS' free tier, + using AWS to deploy Terracotta will probably incur some charges to your + account. Make sure to check the pricing policy of all relevant services + for your specific region. Environment setup ----------------- -Create and activate a new virtual environment (here called ``tc-deploy``). -Install all relevant dependencies via ``pip install -r zappa_requirements.txt``. -Install the AWS command line tools via ``pip install awscli``. -Configure access to AWS by running ``aws configure``. Make sure that you have proper access to S3 and AWS λ before continuing. +The easiest way to deploy Terracotta to AWS λ is by using +`Zappa `__. Zappa takes care of packaging +Terracotta and its dependencies, and creates endpoints on AWS λ and API +Gateway for us. + +.. seealso:: + + Zappa works best on Linux. Windows 10 users can use the + :doc:`Windows Subsystem for Linux ` to deploy Terracotta. + +Assuming you alredy have Terracotta installed, follow these steps to setup +a deployment environment: + +1. Create and activate a new virtual environment (here called ``tc-deploy``), + e.g. via + + .. code-block:: bash + + $ pip install virtualenv --user + $ virtualenv ~/envs/tc-deploy --python=python3.6 + $ source ~/envs/tc-deploy/bin/activate + + If you do not have Python 3.6 installed, one way to get it is via the + ``deadsnakes`` PPA (on Ubuntu): + + .. code-block:: bash + + $ sudo add-apt-repository ppa:deadsnakes/ppa + $ sudo apt update + $ sudo apt install python3.6-dev + + Alternatively, you can use ``pyenv`` or ``conda``. + +2. Install all relevant dependencies and Terracotta via + + .. code-block:: bash + + $ pip install -r zappa_requirements.txt + $ pip install -e . + + in the root of the Terracotta repository. + +3. Install the AWS command line tools via + + .. code-block:: bash + + $ pip install awscli + +4. Configure access to AWS by running + + .. code-block:: bash + + $ aws configure + + This requires that you have an account `on AWS `__ and a valid + IAM user with programmatic access to all relevant resources. + +Make sure that you have proper access to S3 and AWS λ before continuing, e.g. by +running + +.. code-block:: bash + + $ aws s3 ls + + +Optional: Setup a MySQL server on RDS +------------------------------------- + +Setting up a dedicated MySQL server for your Terracotta database is slightly +more cumbersome than relying on SQLite, but has some decisive advantages: + +- Removes the overhead of downloading the SQLite database. +- The contents of the database are accessible from the outside, and ingesting + additional data is more straightforward. +- Multiple Terracotta instances can use the same database server. + +To set up a MySQL server on AWS, just follow these steps: + +1. `Head over to RDS `__ and create a new + MySQL instance. You can either use one of the free-tier, dedicated MySQL + servers, or the AWS Aurora MySQL flavor. + + The default settings for RDS are unfortunately far from optimal for + Terracotta. You should tweak them by creating a new "parameter group" + and setting -Optional: Setup MySQL server on RDS ------------------------------------ + :: -Populate data storage and create database ------------------------------------------ + wait_timeout = 1 + max_connections = 16000 + + Don't forget to apply the parameter group to your RDS instance. + +2. By default, your Terracotta Lambda function will not have access to the + RDS instance. To allow access, you will have to add it to the same security + group and subnets as your RDS instance. You can achieve this by adding a + section like this one to your ``zappa_settings.toml`` (see below): + + :: + + [development.vpc_config] + SubnetIds = ["subnet-xxxxxxxx","subnet-yyyyyyyy", "subnet-zzzzzzzz"] + SecurityGroupIds = ["sg-xxxxxxxxxxxxxxxxx"] + + You can extract the correct IDs by clicking on your RDS instance. + +3. By adding the Lambda function to a VPC, it loses access to S3. To re-enable + it, `go to the VPC settings `__ and + create an endpoint for the VPC of the Lambda function, pointing to AWS S3 + (e.g. ``com.amazonaws.eu-central-1.s3``). + +You are now ready to continue with the following step! + + +Populate data storage and database +---------------------------------- + +The recommended way to ingest your optimized raster files into the database +is through :doc:`the Terracotta Python API <../api>`. To initialize your +database, just run something like + +.. code-block:: ipython + + >>> import terracotta as tc + + >>> # for sqlite + >>> driver = tc.get_driver('tc.sqlite') + + >>> # for mysql + >>> driver = tc.get_driver('mysql://user:password@hostname/database') + + >>> key_names = ('type', 'date', 'band') + >>> driver.create(key_names) + +You can then ingest your raster files into the database: + +.. code-block:: ipython + + >>> rasters = { + ... ('index', '20180101, 'ndvi'): 'S2_20180101_NDVI.tif', + ... ('reflectance', '20180101, 'B04'): 'S2_20180101_B04.tif', + ... } + >>> for keys, raster_file in rasters.items(): + ... driver.insert(keys, raster_file, + ... override_path=f's3://tc-data/rasters/{raster_file}') + +Verify that everything went well by executing + +.. code-block:: ipython + + >>> driver.get_datasets() + { + ('index', '20180101, 'ndvi'): 's3://tc-data/rasters/S2_20180101_NDVI.tif', + ('reflectance', '20180101, 'B04'): 's3://tc-data/rasters/S2_20180101_B04.tif', + } + +Finally, just make sure that your raster files end up in the place where +Terracotta is looking for them (the paths returned by +:meth:`~terracotta.drivers.sqlite.SQLiteDriver.get_datasets`). You can e.g. +use the AWS CLI: + +.. code-block:: bash + + $ aws s3 sync /path/to/rasters s3://tc-data/rasters + $ aws s3 cp /path/to/tc.sqlite s3://tc-data/tc.sqlite # if using sqlite + +To verify whether everything went well, you can start a local Terracotta +server: + +.. code-block:: bash + + $ terracotta serve s3://tc-data/tc.sqlite + $ terracotta connect localhost:5000 -If you haven’t already done so, create the Terracotta database you - want to use, and upload your raster files to S3. Deploy via Zappa ---------------- -Copy or rename ``zappa_settings.toml.in`` to ``zappa_settings.toml`` and insert the correct path to your Terracotta database. -Run ``zappa deploy development`` or ``zappa deploy production``. Congratulations, your Terracotta instance should now be reachable! +The Terracotta repository contains a template with sensible default values for +most Zappa settings: -Verify deployment ------------------ +.. literalinclude:: ../../zappa_settings.toml.in + :caption: zappa_settings.toml.in + +Copy or rename ``zappa_settings.toml.in`` to ``zappa_settings.toml`` and insert +the correct path to your Terracotta database into the environment variables. +To execute the deployment, run + +.. code-block:: bash + + $ source ~/envs/tc-deploy/bin/activate + $ zappa deploy development + +Congratulations, your Terracotta instance should now be reachable! You can +verify the deployment via :doc:`terracotta connect <../cli-commands/connect>`. diff --git a/docs/tutorials/categorical.rst b/docs/tutorials/categorical.rst index 724717b6..b4389a86 100644 --- a/docs/tutorials/categorical.rst +++ b/docs/tutorials/categorical.rst @@ -26,7 +26,7 @@ During ingestion sense for your given application. 2. Attach a mapping ``category name -> pixel value`` to the metadata of - your categorical dataset. Using the :doc:`Python API `, you + your categorical dataset. Using the :doc:`Python API <../api>`, you could do it like this: .. code-block:: python @@ -102,4 +102,4 @@ can use the following functionality: Colors can be passed as hex strings (as in this example) or RGBA color tuples. In case you are looking for a nice color scheme for your categorical datasets, `color brewer `__ - features some excellent suggestions. \ No newline at end of file + features some excellent suggestions. diff --git a/docs/tutorials/wsgi.rst b/docs/tutorials/wsgi.rst index 7cd0567d..28483c23 100644 --- a/docs/tutorials/wsgi.rst +++ b/docs/tutorials/wsgi.rst @@ -52,7 +52,8 @@ you should now be able to access the default nginx page via: http://your_server_ip -For further instructions on how to initially set up Nginx check `here`_. +For further instructions on how to initially set up Nginx check +`here `__. Get data and optimize for Terracotta @@ -150,7 +151,7 @@ To check errors in the service and nginx files: $ sudo nginx -t -This guide is adjusted from `here`_. +This guide is adjusted from `here `__. Optional: SSL Encryption @@ -250,5 +251,3 @@ for the first time via ``https://VM_IP`` are expected because we are using a self signed SSL certificate. The traffic is encrypted, the certificate is just not signed by any of the trusted certificate authorities. - -.. _here: https://www.digitalocean.com/community/tutorials/how-to-install-nginx-on-ubuntu-18-04