diff --git a/earthpy/appeears.py b/earthpy/appeears.py new file mode 100644 index 00000000..f7316843 --- /dev/null +++ b/earthpy/appeears.py @@ -0,0 +1,352 @@ +""" +earthpy.appeears +================ + +A module to download data using the APPPEARS API. + +""" + +import getpass +import json +import logging +import os +import pathlib +import re +import time +from glob import glob + +import keyring +import requests + +class AppeearsDownloader(object): + """ + Class to download data using the appeears API + + appeears (Application for Extracting and Exploring Analysis + Ready Samples) offers a simple and efficient way to access + and transform geospatial data from a variety of federal (US) + data archives. This class implements a subset of the API + features. Usage requires and Earthdata Login, available + from https://urs.earthdata.nasa.gov/. More information + about the application is available at + https://appeears.earthdatacloud.nasa.gov/. + + Parameters + ---------- + download_key : str, optional + Label used in data_dir and as the API job label + ea_dir : pathlike, optional + Replacement directory for ~/earth-analytics + product : str + A product code from + https://appeears.earthdatacloud.nasa.gov/products + layer : str + A layer code from + https://appeears.earthdatacloud.nasa.gov/products + start_date : str + Start date for data subset, as 'MM-DD-YYYY' + or 'MM-DD' if recurring + end_date : str + End date for data subset, as 'MM-DD-YYYY' + or 'MM-DD' if recurring + recurring : bool + Whether the date range recurs each year. + Requires year_range. + year_range : str + Year range for recurring dates, as '[YYYY,YYYY]' + polygon: gpd.GeoDataFrame + The spatial boundary to subset + + Attributes + ---------- + base_url : str + The appeears API url + data_dir : pathlike + Path to store data in. Default: ~/earth-analytics/appeears-data + download_key : str + Label used in data_dir and as the API job label + auth_header : str + Authentication header to use for AppEEARS commands + task_id : str + Task ID assigned by AppEEARS + """ + + base_url = "https://appeears.earthdatacloud.nasa.gov/api/" + + def __init__( + self, + product, layer, start_date, end_date, polygon, + recurring=False, year_range=None, + download_key="appeears", ea_dir=None, + use_keyring=True): + + # Initialize attributes + self._product = product + self._layer = layer + self._start_date = start_date + self._end_date = end_date + self._recurring = recurring + self._year_range = year_range + self._polygon = polygon + + self._auth_header = None + self._status = None + + # Set up task id + self.task_id_path = os.path.join( + pathlib.Path.home(), '.appeears_taskid') + if os.path.exists(self.task_id_path): + with open(self.task_id_path, 'r') as task_id_file: + self._task_id = task_id_file.readline() + elif 'APPEEARS_TASKID' in os.environ: + self._task_id = os.environ['APPEEARS_TASKID'] + with open(self.task_id_path, 'w') as task_id_file: + task_id_file.write(self._task_id) + else: + self._task_id = None + + # Set up download path + self.download_key = download_key + if ea_dir is None: + ea_dir = os.path.join(pathlib.Path.home(), 'earth-analytics') + self.data_dir = os.path.join(ea_dir, download_key) + + def appeears_request( + self, endpoint, + method='POST', req_json=None, stream=False, + **parameters): + """ + Submits a request to the AppEEARS API + + Parameters + ---------- + endpoint : str + The API endpoint from + https://appeears.earthdatacloud.nasa.gov/api/ + method : str + HTTP method 'GET' or 'POST' + json : dictlike, optional + JSON to submit with the request (for the task endpoint) + **parameters : dict, optional + Named parameters to format into the endpoint + """ + + logging.info('Submitting {} request...'.format(endpoint)) + + kwargs = { + 'url': self.base_url + endpoint.format(**parameters), + 'headers': {'Authorization': self.auth_header} + } + if req_json: + logging.debug('Submitting task with JSON\n{}'.format( + json.dumps(req_json))) + kwargs['json'] = req_json + + # Stream file downloads + if stream: + kwargs['allow_redirects'] = True + kwargs['stream'] = True + + # Submit request + response = requests.request(method=method, **kwargs) + logging.debug('RESPONSE TEXT: \n{}'.format(response.text)) + response.raise_for_status() + + logging.info('{} request successfully completed'.format(endpoint)) + + return response + + + def login(self, service='NASA_EARTHDATA', username_id='NED_USERNAME'): + """ + Logs in to the AppEEARS API. + + Login happens automatically when self.auth_header is + requested. Call this function to use a customized + service name in the keyring, or set the self._auth_header + value manually for other custom situations. + + Parameters + ---------- + service : str, optional + The name under which to store the credential in keyring + """ + # Get username and password from keyring + try: + username = keyring.get_password(service, username_id) + password = keyring.get_password(service, username) + except: + username = None + password = None + + # Get username and password from environment + try: + username = os.environ['EARTHDATA_USERNAME'] + password = os.environ['EARTHDATA_PASSWORD'] + except: + username = None + password = None + + # Prompt user if no username or password is stored + if (username is None) or (password is None): + # Ask for the user's username and password + username = input('NASA Earthdata Username: ') + password = getpass.getpass('NASA Earthdata Password: ') + try: + keyring.set_password(service, username_id, username) + keyring.set_password(service, username, password) + except: + pass + + logging.info('Logging into AppEEARS API...') + + # Set up authentication and submit login request + login_resp = requests.post( + self.base_url + 'login', + auth=(username, password)) + login_resp.raise_for_status() + + self._auth_header = ( + '{token_type} {token}'.format(**login_resp.json())) + + logging.info( + 'Login successful. Auth Header: {}'.format(self._auth_header)) + + @property + def auth_header(self): + if not self._auth_header: + self.login() + return self._auth_header + + @property + def task_id(self): + if not self._task_id: + self.submit_task_request() + return self._task_id + + @property + def task_status(self): + if self._status != 'done': + self.wait_for_task() + return self._status + + def submit_task_request(self): + """ + Submit task request for the object parameters + + This function is automatically called when self.task_id + is requested. Set self._task_id to override. + """ + # Task parameters + task = { + 'task_type': 'area', + 'task_name': self.download_key, + 'params': { + 'dates': [ + { + 'startDate': self._start_date, + 'endDate': self._end_date + } + ], + 'layers': [ + { + 'product': self._product, + 'layer': self._layer + } + ], + # Need subdivisions as json, not as a string + "geo": json.loads(self._polygon.dissolve().envelope.to_json()), + "output": { + "format": {"type": "geotiff"}, + "projection": "geographic" + } + } + } + + if self._recurring: + if self._year_range is None: + raise ValueError( + 'Must supply year range for recurring dates') + task['params']['dates'][0]['recurring'] = True + task['params']['dates'][0]['yearRange'] = self._year_range + + # Submit the task request + task_response = self.appeears_request('task', req_json=task) + + # Save task ID for later + self._task_id = task_response.json()['task_id'] + with open(self.task_id_path, 'w') as task_id_file: + task_id_file.write(self._task_id) + + def wait_for_task(self): + """ + Waits for the AppEEARS service to prepare data subset + """ + self._status = 'initializing' + while self._status != 'done': + time.sleep(3) + # Wait 20 seconds in between status checks + if self._status != 'initializing': + time.sleep(20) + + # Check status + status_response = self.appeears_request( + 'status/{task_id}', method='GET', task_id=self.task_id) + + # Update status + if 'progress' in status_response.json(): + self._status = status_response.json()['progress']['summary'] + elif 'status' in status_response.json(): + self._status = status_response.json()['status'] + + logging.info(self._status) + logging.info('Task completed - ready for download.') + + def download_files(self, cache=True): + """ + Streams all prepared file downloads + + Parameters + ---------- + cache : bool + Use cache to avoid repeat downloads + """ + status = self.task_status + logging.info('Current task status: {}'.format(status)) + + # Get file download information + bundle_response = self.appeears_request( + 'bundle/{task_id}', + method='GET', + task_id=self.task_id) + + files = bundle_response.json()['files'] + + '{} files available for download'.format(len(files)) + + # Download files + for file_info in files: + # Get a stream to the bundle file + response = self.appeears_request( + 'bundle/{task_id}/{file_id}', + method='GET', task_id=self.task_id, stream=True, + file_id=file_info['file_id']) + + # Create a destination directory to store the file in + filepath = os.path.join(self.data_dir, file_info['file_name']) + if not os.path.exists(os.path.dirname(filepath)): + os.makedirs(os.path.dirname(filepath)) + + # Write the file to the destination directory + if os.path.exists(filepath) and cache: + logging.info( + 'File at {} alreading exists. Skipping...' + .format(filepath)) + else: + logging.info('Downloading file {}'.format(filepath)) + with open(filepath, 'wb') as f: + for data in response.iter_content(chunk_size=8192): + f.write(data) + + # Remove task id file when download is complete + os.remove(self.task_id_path) diff --git a/earthpy/earthexplorer.py b/earthpy/earthexplorer.py new file mode 100644 index 00000000..64f3b2a1 --- /dev/null +++ b/earthpy/earthexplorer.py @@ -0,0 +1,299 @@ +import json +import os +import tarfile +import time +import zipfile +from getpass import getpass + +import requests + +from .io import HOME, DATA_NAME + +class BBox: + """ + Coordinates of a bounding box for EarthExplorerDownloader + + Parameters + ---------- + llx, lly, urx, ury: float + The lower left (ll) and upper right (ur) x and y coordinates + + Attributes + ---------- + llx, lly, urx, ury: float + The lower left (ll) and upper right (ur) x and y coordinatess + spatial_filter: dict, JSON-like + Prepared JSON for the bounding box for + EarthExplorerDownloader + """ + + def __init__(self, llx, lly, urx, ury): + self.llx, self.lly, self.urx, self.ury = llx, lly, urx, ury + + @property + def spatial_filter(self): + return { + 'filterType': "mbr", + 'lowerLeft': {'latitude': self.lly, 'longitude': self.llx}, + 'upperRight': {'latitude': self.ury, 'longitude': self.urx}} + +class EarthExplorerDownloader: + """ + Download data using the USGS M2M API (EarthExplorer) + + Parameters + ---------- + dataset : str + The name of the dataset to download + label : str + M2M identifier for the download + bbox : earthpy.earthexplorer.BBox + Spatial extent of the download + start : str + Start date of the download in 'YYYY-MM-DD' format + end : str + End date of the download in 'YYYY-MM-DD' format + file_type (optional) : str + Either 'zip' or 'tar', default 'zip' + store_credential : boolean + Whether or not to store the EarthExplorer username and password + in the user's home folder. + + Attributes + ---------- + base_url : urllike + The base URL for the API + dld_file_tmpl : str + Format string for the names of downloaded files + api_key : str + The api key for the session + ext : str + Extension corresponding to the file_type parameter + temporal_filter : dict + Start and end dates prepared for JSON + acquisition_filter : dict + Same as temporal_filter + data_dir : pathlike + Directory to store downloaded data + path_tmpl : pathlike + Format string for downloaded file paths + """ + base_url = "https://m2m.cr.usgs.gov/api/api/json/stable/{endpoint}" + dld_file_tmpl = '{display_id}.{ext}' + + def __init__(self, dataset, label, bbox, start, end, + file_type='zip', store_credential=False): + self.api_key = None + self.ext = file_type + self.store_credential = store_credential + self.login() + + self.dataset, self.label = dataset, label + self.bbox, self.start, self.end = bbox, start, end + + self.temporal_filter = {'start': start, 'end': end} + self.acquisition_filter = self.temporal_filter + + self.data_dir = os.path.join(HOME, DATA_NAME, self.label) + self.path_tmpl = os.path.join(self.data_dir, self.dld_file_tmpl) + if not os.path.exists(self.data_dir): + os.makedirs(self.data_dir) + + self._dataset_alias = None + + def get_ee_login_info(self, info_type): + """ + Collect and store API login info + + Parameters + ---------- + info_type : str + 'username' or 'password' + + Returns + ------- + info : str + User input + """ + info_path = os.path.join(HOME, '.ee_{}'.format(info_type)) + info = None + if os.path.exists(info_path) and self.store_credential: + with open(info_path, 'r') as info_file: + return info_file.read() + if info_type=='username': + info = input('Enter EarthExplorer {}: '.format(info_type)) + if info_type=='password': + info = getpass('Enter EarthExplorer {}: '.format(info_type)) + if self.store_credential: + with open(info_path, 'w') as info_file: + info_file.write(info) + return info + + def login(self): + """ + Lo + """ + if self.api_key is None: + login_payload = { + 'username': self.get_ee_login_info('username'), + 'password': self.get_ee_login_info('password')} + self.api_key = self.post("login", login_payload) + print('Login Successful.') + + @property + def headers(self): + if self.api_key is None: + return None + return {'X-Auth-Token': self.api_key} + + def logout(self): + self.post("logout", None) + print("Logged Out\n\n") + + def post(self, endpoint, data): + # Send POST requests + url = self.base_url.format(endpoint=endpoint) + response = requests.post(url, json.dumps(data), headers=self.headers) + + # Raise any HTTP Errors + response.raise_for_status() + + # Return data + return response.json()['data'] + + @property + def dataset_alias(self): + if self._dataset_alias is None: + print("Searching datasets...") + params = { + 'datasetName': self.dataset, + 'spatialFilter': self.bbox.spatial_filter, + 'temporalFilter': self.temporal_filter} + datasets = self.post("dataset-search", params) + + # Get a single dataset alias + if len(datasets) > 1: + print(datasets) + raise ValueError('Multiple datasets found - refine search.') + self._dataset_alias = datasets[0]['datasetAlias'] + + print('Using dataset alias: {}'.format(self._dataset_alias)) + return self._dataset_alias + + def find_scene_ids(self): + params = { + 'datasetName': self.dataset_alias, + 'startingNumber': 1, + + 'sceneFilter': { + 'spatialFilter': self.bbox.spatial_filter, + 'acquisitionFilter': self.acquisition_filter}} + + print("Searching scenes...") + scenes = self.post("scene-search", params) + print('Found {} scenes'.format(scenes['recordsReturned'])) + return scenes + + def find_available_product_info(self): + scenes = self.find_scene_ids() + params = { + 'datasetName': self.dataset_alias, + 'entityIds': [scene['entityId'] for scene in scenes['results']]} + products = self.post("download-options", params) + + # Aggregate a list of available products + product_info = [] + for product in products: + # Make sure the product is available for this scene + if 'proxied' in product: + proxied = product['proxied'] + else: + proxied = False + if product['available']==True or proxied==True: + product_info.append({ + 'entityId': product['entityId'], + 'productId': product['id']}) + if not product_info: + raise ValueError('No available products.') + print('{} products found.'.format(len(product_info))) + return product_info + + def submit_download_request(self): + product_info = self.find_available_product_info() + # Did we find products? + if product_info: + # Request downloads + params = { + 'downloads': product_info, + 'label': self.label} + downloads = self.post("download-request", params) + print('Downloads staging...') + else: + raise ValueError( + 'No products found with the specified boundaries.') + + def check_download_status(self): + params = {'label': self.label} + downloads = self.post("download-retrieve", params) + return downloads + + def wait_for_available_downloads(self, timeout=None): + keep_waiting = True + while keep_waiting: + downloads = self.check_download_status() + n_queued = downloads['queueSize'] + keep_waiting = n_queued > 0 + if keep_waiting: + print("\n", n_queued, + "downloads queued but not yet available. " + "Waiting for 30 seconds.\n") + time.sleep(30) + + if not timeout is None: + timeout -= 30 + if timeout < 0: + break + + return downloads + + def download(self, wait=True, timeout=None, override=True): + # Check download status + if wait: + downloads = self.wait_for_available_downloads(timeout=timeout) + else: + downloads = self.check_download_status() + + available_or_proxied = ( + downloads['available'] + + [dld for dld + in downloads['requested'] if dld['statusCode']=='P']) + if len(available_or_proxied)==0: + raise ValueError('No available downloads.') + + # Download available downloads + for download in available_or_proxied: + # Download and save compressed file + dld_path = self.path_tmpl.format( + display_id=download['displayId'], ext=self.ext) + print(dld_path) + # Cache downloads + if override or (not os.path.exists(dld_path)): + print('Saving download: {}'.format(download['displayId'])) + with open(dld_path, 'wb') as dld_file: + response = requests.get(download['url']) + dld_file.write(response.content) + + # Remove download from M2M system + params = {'downloadId': download['downloadId']} + self.post('download-remove', params) + + self.uncompress(dld_path) + + def uncompress(self, download_path): + # Extract compressed files + if self.ext=='tar': + with tarfile.TarFile(download_path, 'r') as dld_tarfile: + dld_tarfile.extractall(self.data_dir) + if self.ext=='zip': + with zipfile.ZipFile(download_path, 'r') as dld_zipfile: + dld_zipfile.extractall(self.data_dir) \ No newline at end of file diff --git a/earthpy/tests/test_appeears.py b/earthpy/tests/test_appeears.py new file mode 100644 index 00000000..581edb28 --- /dev/null +++ b/earthpy/tests/test_appeears.py @@ -0,0 +1,41 @@ +""" +Tests for appears module +""" + +import os +import logging +import pathlib + + +import geopandas as gpd +import earthpy.appeears as eaapp + +def test_download_data(): + logging.basicConfig(level=logging.DEBUG) + + quotes_url = ( + "https://opendata.arcgis.com/datasets/955e7a0f5" + "2474b60a9866950daf10acb_0.zip" + ) + gdf = gpd.read_file(quotes_url) + downloader = eaapp.AppeearsDownloader( + 'MOD13Q1.061', '_250m_16_days_NDVI', '01-01-2021', '01-20-2021', gdf) + downloader.download_files() + +def test_download_recurring_data(): + logging.basicConfig(level=logging.DEBUG) + + quotes_url = ( + "https://opendata.arcgis.com/datasets/955e7a0f5" + "2474b60a9866950daf10acb_0.zip" + ) + gdf = gpd.read_file(quotes_url) + downloader = eaapp.AppeearsDownloader( + product='MOD13Q1.061', + layer='_250m_16_days_NDVI', + start_date='01-01', + end_date='01-20', + recurring=True, + year_range=[2021, 2022], + polygon=gdf) + downloader.download_files() \ No newline at end of file diff --git a/setup.py b/setup.py index cdb7a2a3..4cce930d 100644 --- a/setup.py +++ b/setup.py @@ -9,8 +9,8 @@ "Earth Lab and was originally designed to support the earth " "analytics education program." ) -MAINTAINER = "Leah Wasser" -MAINTAINER_EMAIL = "leah.wasser@colorado.edu" +MAINTAINER = "Elsa Culler" +MAINTAINER_EMAIL = "elsa.culler@colorado.edu" # read the contents of your README file @@ -27,7 +27,7 @@ description=DESCRIPTION, long_description=LONG_DESCRIPTION, long_description_content_type="text/markdown", - version="0.9.4", + version="0.10.0", packages=["earthpy"], install_requires=[ "geopandas", @@ -36,6 +36,7 @@ "rasterio", "scikit-image", "requests", + "keyring" ], zip_safe=False, # the package can run out of an .egg file classifiers=[