From d69270841c3718a35fd9336ee9ad9f38cc5cfc8d Mon Sep 17 00:00:00 2001 From: John Truckenbrodt Date: Wed, 7 Aug 2019 16:28:06 +0200 Subject: [PATCH 1/8] [asf_template.py] initial commit --- sentinel_api/asf_template.py | 596 +++++++++++++++++++++++++++++++++++ 1 file changed, 596 insertions(+) create mode 100755 sentinel_api/asf_template.py diff --git a/sentinel_api/asf_template.py b/sentinel_api/asf_template.py new file mode 100755 index 0000000..1b12b89 --- /dev/null +++ b/sentinel_api/asf_template.py @@ -0,0 +1,596 @@ +#!/usr/bin/python + +# Usage: +# +# In a terminal/command line, cd to the directory where this file lives. Then... +# +# With embedded urls: ( download the hardcoded list of files in the 'files =' block below) +# +# python ./download-all-2019-08-07_12-39-51.py +# +# Download all files in a Metalink/CSV: (downloaded from ASF Vertex) +# +# python ./download-all-2019-08-07_12-39-51.py /path/to/downloads.metalink localmetalink.metalink localcsv.csv +# +# Compatibility: python >= 2.6.5, 2.7.5, 3.0 +# +# If downloading from a trusted source with invalid SSL Certs, use --insecure to ignore +# +# For more information on bulk downloads, navigate to: +# https://www.asf.alaska.edu/data-tools/bulk-download/ +# +# +# +# This script was generated by the Alaska Satellite Facility's bulk download service. +# For more information on the service, navigate to: +# http://bulk-download.asf.alaska.edu/help +# + +import sys, csv +import os, os.path +import tempfile, shutil +import re + +import base64 +import time +import getpass +import ssl +import signal + +import xml.etree.ElementTree as ET + +############# +# This next block is a bunch of Python 2/3 compatability + +try: + # Python 2.x Libs + from urllib2 import build_opener, install_opener, Request, urlopen, HTTPError + from urllib2 import URLError, HTTPSHandler, HTTPHandler, HTTPCookieProcessor + + from cookielib import MozillaCookieJar + from StringIO import StringIO + +except ImportError as e: + + # Python 3.x Libs + from urllib.request import build_opener, install_opener, Request, urlopen + from urllib.request import HTTPHandler, HTTPSHandler, HTTPCookieProcessor + from urllib.error import HTTPError, URLError + + from http.cookiejar import MozillaCookieJar + from io import StringIO + +### +# Global variables intended for cross-thread modification +abort = False + +### +# A routine that handles trapped signals +def signal_handler(sig, frame): + global abort + sys.stderr.output("\n > Caught Signal. Exiting!\n") + abort = True # necessary to cause the program to stop + raise SystemExit # this will only abort the thread that the ctrl+c was caught in + +class bulk_downloader: + def __init__(self): + # List of files to download + self.files = [ "https://datapool.asf.alaska.edu/GRD_HD/SB/S1B_IW_GRDH_1SDV_20190807T045445_20190807T045510_017475_020DCD_A6EF.zip", + "https://datapool.asf.alaska.edu/GRD_HD/SB/S1B_IW_GRDH_1SDV_20190807T043950_20190807T044019_017475_020DCA_CE96.zip" ] + + # Local stash of cookies so we don't always have to ask + self.cookie_jar_path = os.path.join( os.path.expanduser('~'), ".bulk_download_cookiejar.txt") + self.cookie_jar = None + + self.asf_urs4 = { 'url': 'https://urs.earthdata.nasa.gov/oauth/authorize', + 'client': 'BO_n7nTIlMljdvU6kRRB3g', + 'redir': 'https://vertex-retired.daac.asf.alaska.edu/services/urs4_token_request'} + + # Make sure we can write it our current directory + if os.access(os.getcwd(), os.W_OK) is False: + print ("WARNING: Cannot write to current path! Check permissions for {0}".format(os.getcwd())) + exit(-1) + + # For SSL + self.context = {} + + # Check if user handed in a Metalink or CSV: + if len(sys.argv) > 0: + download_files = [] + input_files = [] + for arg in sys.argv[1:]: + if arg == '--insecure': + try: + ctx = ssl.create_default_context() + ctx.check_hostname = False + ctx.verify_mode = ssl.CERT_NONE + self.context['context'] = ctx + except AttributeError: + # Python 2.6 won't complain about SSL Validation + pass + + elif arg.endswith('.metalink') or arg.endswith('.csv'): + if os.path.isfile( arg ): + input_files.append( arg ) + if arg.endswith('.metalink'): + new_files = self.process_metalink(arg) + else: + new_files = self.process_csv(arg) + if new_files is not None: + for file_url in (new_files): + download_files.append( file_url ) + else: + print (" > I cannot find the input file you specified: {0}".format(arg)) + else: + print (" > Command line argument '{0}' makes no sense, ignoring.".format(arg)) + + if len(input_files) > 0: + if len(download_files) > 0: + print (" > Processing {0} downloads from {1} input files. ".format(len(download_files), len(input_files))) + self.files = download_files + else: + print (" > I see you asked me to download files from {0} input files, but they had no downloads!".format(len(input_files))) + print (" > I'm super confused and exiting.") + exit(-1) + + # Make sure cookie_jar is good to go! + self.get_cookie() + + # summary + self.total_bytes = 0 + self.total_time = 0 + self.cnt = 0 + self.success = [] + self.failed = [] + self.skipped = [] + + + # Get and validate a cookie + def get_cookie(self): + if os.path.isfile(self.cookie_jar_path): + self.cookie_jar = MozillaCookieJar() + self.cookie_jar.load(self.cookie_jar_path) + + # make sure cookie is still valid + if self.check_cookie(): + print(" > Re-using previous cookie jar.") + return True + else: + print(" > Could not validate old cookie Jar") + + # We don't have a valid cookie, prompt user or creds + print ("No existing URS cookie found, please enter Earthdata username & password:") + print ("(Credentials will not be stored, saved or logged anywhere)") + + # Keep trying 'till user gets the right U:P + while self.check_cookie() is False: + self.get_new_cookie() + + return True + + # Validate cookie before we begin + def check_cookie(self): + + if self.cookie_jar is None: + print (" > Cookiejar is bunk: {0}".format(self.cookie_jar)) + return False + + # File we know is valid, used to validate cookie + file_check = 'https://urs.earthdata.nasa.gov/profile' + + # Apply custom Redirect Hanlder + opener = build_opener(HTTPCookieProcessor(self.cookie_jar), HTTPHandler(), HTTPSHandler(**self.context)) + install_opener(opener) + + # Attempt a HEAD request + request = Request(file_check) + request.get_method = lambda : 'HEAD' + try: + print (" > attempting to download {0}".format(file_check)) + response = urlopen(request, timeout=30) + resp_code = response.getcode() + # Make sure we're logged in + if not self.check_cookie_is_logged_in(self.cookie_jar): + return False + + # Save cookiejar + self.cookie_jar.save(self.cookie_jar_path) + + except HTTPError: + # If we ge this error, again, it likely means the user has not agreed to current EULA + print ("\nIMPORTANT: ") + print ("Your user appears to lack permissions to download data from the ASF Datapool.") + print ("\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov") + exit(-1) + + # This return codes indicate the USER has not been approved to download the data + if resp_code in (300, 301, 302, 303): + try: + redir_url = response.info().getheader('Location') + except AttributeError: + redir_url = response.getheader('Location') + + #Funky Test env: + if ("vertex-retired.daac.asf.alaska.edu" in redir_url and "test" in self.asf_urs4['redir']): + print ("Cough, cough. It's dusty in this test env!") + return True + + print ("Redirect ({0}) occured, invalid cookie value!".format(resp_code)) + return False + + # These are successes! + if resp_code in (200, 307): + return True + + return False + + def get_new_cookie(self): + # Start by prompting user to input their credentials + + # Another Python2/3 workaround + try: + new_username = raw_input("Username: ") + except NameError: + new_username = input("Username: ") + new_password = getpass.getpass(prompt="Password (will not be displayed): ") + + # Build URS4 Cookie request + auth_cookie_url = self.asf_urs4['url'] + '?client_id=' + self.asf_urs4['client'] + '&redirect_uri=' + self.asf_urs4['redir'] + '&response_type=code&state=' + + try: + #python2 + user_pass = base64.b64encode (bytes(new_username+":"+new_password)) + except TypeError: + #python3 + user_pass = base64.b64encode (bytes(new_username+":"+new_password, "utf-8")) + user_pass = user_pass.decode("utf-8") + + # Authenticate against URS, grab all the cookies + self.cookie_jar = MozillaCookieJar() + opener = build_opener(HTTPCookieProcessor(self.cookie_jar), HTTPHandler(), HTTPSHandler(**self.context)) + request = Request(auth_cookie_url, headers={"Authorization": "Basic {0}".format(user_pass)}) + + # Watch out cookie rejection! + try: + response = opener.open(request) + except HTTPError as e: + if e.code == 401: + print (" > Username and Password combo was not successful. Please try again.") + return False + else: + # If an error happens here, the user most likely has not confirmed EULA. + print ("\nIMPORTANT: There was an error obtaining a download cookie!") + print ("Your user appears to lack permission to download data from the ASF Datapool.") + print ("\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov") + exit(-1) + except URLError as e: + print ("\nIMPORTANT: There was a problem communicating with URS, unable to obtain cookie. ") + print ("Try cookie generation later.") + exit(-1) + + # Did we get a cookie? + if self.check_cookie_is_logged_in(self.cookie_jar): + #COOKIE SUCCESS! + self.cookie_jar.save(self.cookie_jar_path) + return True + + # if we aren't successful generating the cookie, nothing will work. Stop here! + print ("WARNING: Could not generate new cookie! Cannot proceed. Please try Username and Password again.") + print ("Response was {0}.".format(response.getcode())) + print ("\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov") + exit(-1) + + # make sure we're logged into URS + def check_cookie_is_logged_in(self, cj): + for cookie in cj: + if cookie.name == 'urs_user_already_logged': + # Only get this cookie if we logged in successfully! + return True + + return False + + + # Download the file + def download_file_with_cookiejar(self, url, file_count, total, recursion=False): + # see if we've already download this file and if it is that it is the correct size + download_file = os.path.basename(url).split('?')[0] + if os.path.isfile(download_file): + try: + request = Request(url) + request.get_method = lambda : 'HEAD' + response = urlopen(request, timeout=30) + remote_size = self.get_total_size(response) + # Check that we were able to derive a size. + if remote_size: + local_size = os.path.getsize(download_file) + if remote_size < (local_size+(local_size*.01)) and remote_size > (local_size-(local_size*.01)): + print (" > Download file {0} exists! \n > Skipping download of {1}. ".format(download_file, url)) + return None,None + #partial file size wasn't full file size, lets blow away the chunk and start again + print (" > Found {0} but it wasn't fully downloaded. Removing file and downloading again.".format(download_file)) + os.remove(download_file) + + except ssl.CertificateError as e: + print (" > ERROR: {0}".format(e)) + print (" > Could not validate SSL Cert. You may be able to overcome this using the --insecure flag") + return False,None + + except HTTPError as e: + if e.code == 401: + print (" > IMPORTANT: Your user may not have permission to download this type of data!") + else: + print (" > Unknown Error, Could not get file HEAD: {0}".format(e)) + + except URLError as e: + print ("URL Error (from HEAD): {0}, {1}".format( e.reason, url)) + if "ssl.c" in "{0}".format(e.reason): + print ("IMPORTANT: Remote location may not be accepting your SSL configuration. This is a terminal error.") + return False,None + + # attempt https connection + try: + request = Request(url) + response = urlopen(request, timeout=30) + + # Watch for redirect + if response.geturl() != url: + + # See if we were redirect BACK to URS for re-auth. + if 'https://urs.earthdata.nasa.gov/oauth/authorize' in response.geturl(): + + if recursion: + print (" > Entering seemingly endless auth loop. Aborting. ") + return False, None + + # make this easier. If there is no app_type=401, add it + new_auth_url = response.geturl() + if "app_type" not in new_auth_url: + new_auth_url += "&app_type=401" + + print (" > While attempting to download {0}....".format(url)) + print (" > Need to obtain new cookie from {0}".format(new_auth_url)) + old_cookies = [cookie.name for cookie in self.cookie_jar] + opener = build_opener(HTTPCookieProcessor(self.cookie_jar), HTTPHandler(), HTTPSHandler(**self.context)) + request = Request(new_auth_url) + try: + response = opener.open(request) + for cookie in self.cookie_jar: + if cookie.name not in old_cookies: + print (" > Saved new cookie: {0}".format(cookie.name)) + + # A little hack to save session cookies + if cookie.discard: + cookie.expires = int(time.time()) + 60*60*24*30 + print (" > Saving session Cookie that should have been discarded! ") + + self.cookie_jar.save(self.cookie_jar_path, ignore_discard=True, ignore_expires=True) + except HTTPError as e: + print ("HTTP Error: {0}, {1}".format( e.code, url)) + return False,None + + # Okay, now we have more cookies! Lets try again, recursively! + print (" > Attempting download again with new cookies!") + return self.download_file_with_cookiejar(url, file_count, total, recursion=True) + + print (" > 'Temporary' Redirect download @ Remote archive:\n > {0}".format(response.geturl())) + + # seems to be working + print ("({0}/{1}) Downloading {2}".format(file_count, total, url)) + + # Open our local file for writing and build status bar + tf = tempfile.NamedTemporaryFile(mode='w+b', delete=False, dir='.') + self.chunk_read(response, tf, report_hook=self.chunk_report) + + # Reset download status + sys.stdout.write('\n') + + tempfile_name = tf.name + tf.close() + + #handle errors + except HTTPError as e: + print ("HTTP Error: {0}, {1}".format( e.code, url)) + + if e.code == 401: + print (" > IMPORTANT: Your user does not have permission to download this type of data!") + + if e.code == 403: + print (" > Got a 403 Error trying to download this file. ") + print (" > You MAY need to log in this app and agree to a EULA. ") + + return False,None + + except URLError as e: + print ("URL Error (from GET): {0}, {1}, {2}".format(e, e.reason, url)) + if "ssl.c" in "{0}".format(e.reason): + print ("IMPORTANT: Remote location may not be accepting your SSL configuration. This is a terminal error.") + return False,None + + except ssl.CertificateError as e: + print (" > ERROR: {0}".format(e)) + print (" > Could not validate SSL Cert. You may be able to overcome this using the --insecure flag") + return False,None + + # Return the file size + shutil.copy(tempfile_name, download_file) + os.remove(tempfile_name) + file_size = self.get_total_size(response) + actual_size = os.path.getsize(download_file) + if file_size is None: + # We were unable to calculate file size. + file_size = actual_size + return actual_size,file_size + + def get_redirect_url_from_error(self, error): + find_redirect = re.compile(r"id=\"redir_link\"\s+href=\"(\S+)\"") + print ("error file was: {}".format(error)) + redirect_url = find_redirect.search(error) + if redirect_url: + print("Found: {0}".format(redirect_url.group(0))) + return (redirect_url.group(0)) + + return None + + + # chunk_report taken from http://stackoverflow.com/questions/2028517/python-urllib2-progress-hook + def chunk_report(self, bytes_so_far, file_size): + if file_size is not None: + percent = float(bytes_so_far) / file_size + percent = round(percent*100, 2) + sys.stdout.write(" > Downloaded %d of %d bytes (%0.2f%%)\r" % + (bytes_so_far, file_size, percent)) + else: + # We couldn't figure out the size. + sys.stdout.write(" > Downloaded %d of unknown Size\r" % (bytes_so_far)) + + # chunk_read modified from http://stackoverflow.com/questions/2028517/python-urllib2-progress-hook + def chunk_read(self, response, local_file, chunk_size=8192, report_hook=None): + file_size = self.get_total_size(response) + bytes_so_far = 0 + + while 1: + try: + chunk = response.read(chunk_size) + except: + sys.stdout.write("\n > There was an error reading data. \n") + break + + try: + local_file.write(chunk) + except TypeError: + local_file.write(chunk.decode(local_file.encoding)) + bytes_so_far += len(chunk) + + if not chunk: + break + + if report_hook: + report_hook(bytes_so_far, file_size) + + return bytes_so_far + + def get_total_size(self, response): + try: + file_size = response.info().getheader('Content-Length').strip() + except AttributeError: + try: + file_size = response.getheader('Content-Length').strip() + except AttributeError: + print ("> Problem getting size") + return None + + return int(file_size) + + + # Get download urls from a metalink file + def process_metalink(self, ml_file): + print ("Processing metalink file: {0}".format(ml_file)) + with open(ml_file, 'r') as ml: + xml = ml.read() + + # Hack to remove annoying namespace + it = ET.iterparse(StringIO(xml)) + for _, el in it: + if '}' in el.tag: + el.tag = el.tag.split('}', 1)[1] # strip all namespaces + root = it.root + + dl_urls = [] + ml_files = root.find('files') + for dl in ml_files: + dl_urls.append(dl.find('resources').find('url').text) + + if len(dl_urls) > 0: + return dl_urls + else: + return None + + # Get download urls from a csv file + def process_csv(self, csv_file): + print ("Processing csv file: {0}".format(csv_file)) + + dl_urls = [] + with open(csv_file, 'r') as csvf: + try: + csvr = csv.DictReader(csvf) + for row in csvr: + dl_urls.append(row['URL']) + except csv.Error as e: + print ("WARNING: Could not parse file %s, line %d: %s. Skipping." % (csv_file, csvr.line_num, e)) + return None + except KeyError as e: + print ("WARNING: Could not find URL column in file %s. Skipping." % (csv_file)) + + if len(dl_urls) > 0: + return dl_urls + else: + return None + + # Download all the files in the list + def download_files(self): + for file_name in self.files: + + # make sure we haven't ctrl+c'd or some other abort trap + if abort == True: + raise SystemExit + + # download counter + self.cnt += 1 + + # set a timer + start = time.time() + + # run download + size,total_size = self.download_file_with_cookiejar(file_name, self.cnt, len(self.files)) + + # calculte rate + end = time.time() + + # stats: + if size is None: + self.skipped.append(file_name) + # Check to see that the download didn't error and is the correct size + elif size is not False and (total_size < (size+(size*.01)) and total_size > (size-(size*.01))): + # Download was good! + elapsed = end - start + elapsed = 1.0 if elapsed < 1 else elapsed + rate = (size/1024**2)/elapsed + + print ("Downloaded {0}b in {1:.2f}secs, Average Rate: {2:.2f}MB/sec".format(size, elapsed, rate)) + + # add up metrics + self.total_bytes += size + self.total_time += elapsed + self.success.append( {'file':file_name, 'size':size } ) + + else: + print ("There was a problem downloading {0}".format(file_name)) + self.failed.append(file_name) + + def print_summary(self): + # Print summary: + print ("\n\nDownload Summary ") + print ("--------------------------------------------------------------------------------") + print (" Successes: {0} files, {1} bytes ".format(len(self.success), self.total_bytes)) + for success_file in self.success: + print (" - {0} {1:.2f}MB".format(success_file['file'],(success_file['size']/1024.0**2))) + if len(self.failed) > 0: + print (" Failures: {0} files".format(len(self.failed))) + for failed_file in self.failed: + print (" - {0}".format(failed_file)) + if len(self.skipped) > 0: + print (" Skipped: {0} files".format(len(self.skipped))) + for skipped_file in self.skipped: + print (" - {0}".format(skipped_file)) + if len(self.success) > 0: + print (" Average Rate: {0:.2f}MB/sec".format( (self.total_bytes/1024.0**2)/self.total_time)) + print ("--------------------------------------------------------------------------------") + + +if __name__ == "__main__": + # Setup a signal trap for SIGINT (Ctrl+C) + signal.signal(signal.SIGINT, signal_handler) + + downloader = bulk_downloader() + downloader.download_files() + downloader.print_summary() From db8b615b41c4b05b0b0668e31f60a921f8b04ff8 Mon Sep 17 00:00:00 2001 From: John Truckenbrodt Date: Wed, 7 Aug 2019 16:30:18 +0200 Subject: [PATCH 2/8] [asf_template.py] general code appearance improvements --- sentinel_api/asf_template.py | 888 ++++++++++++++++++----------------- 1 file changed, 451 insertions(+), 437 deletions(-) diff --git a/sentinel_api/asf_template.py b/sentinel_api/asf_template.py index 1b12b89..b00543c 100755 --- a/sentinel_api/asf_template.py +++ b/sentinel_api/asf_template.py @@ -26,9 +26,12 @@ # http://bulk-download.asf.alaska.edu/help # -import sys, csv -import os, os.path -import tempfile, shutil +import os +import os.path +import csv +import sys +import tempfile +import shutil import re import base64 @@ -43,57 +46,60 @@ # This next block is a bunch of Python 2/3 compatability try: - # Python 2.x Libs - from urllib2 import build_opener, install_opener, Request, urlopen, HTTPError - from urllib2 import URLError, HTTPSHandler, HTTPHandler, HTTPCookieProcessor - - from cookielib import MozillaCookieJar - from StringIO import StringIO + # Python 2.x Libs + from urllib2 import build_opener, install_opener, Request, urlopen, HTTPError + from urllib2 import URLError, HTTPSHandler, HTTPHandler, HTTPCookieProcessor + + from cookielib import MozillaCookieJar + from StringIO import StringIO except ImportError as e: - - # Python 3.x Libs - from urllib.request import build_opener, install_opener, Request, urlopen - from urllib.request import HTTPHandler, HTTPSHandler, HTTPCookieProcessor - from urllib.error import HTTPError, URLError - - from http.cookiejar import MozillaCookieJar - from io import StringIO + + # Python 3.x Libs + from urllib.request import build_opener, install_opener, Request, urlopen + from urllib.request import HTTPHandler, HTTPSHandler, HTTPCookieProcessor + from urllib.error import HTTPError, URLError + + from http.cookiejar import MozillaCookieJar + from io import StringIO ### # Global variables intended for cross-thread modification abort = False + ### # A routine that handles trapped signals def signal_handler(sig, frame): global abort sys.stderr.output("\n > Caught Signal. Exiting!\n") - abort = True # necessary to cause the program to stop + abort = True # necessary to cause the program to stop raise SystemExit # this will only abort the thread that the ctrl+c was caught in + class bulk_downloader: def __init__(self): # List of files to download - self.files = [ "https://datapool.asf.alaska.edu/GRD_HD/SB/S1B_IW_GRDH_1SDV_20190807T045445_20190807T045510_017475_020DCD_A6EF.zip", - "https://datapool.asf.alaska.edu/GRD_HD/SB/S1B_IW_GRDH_1SDV_20190807T043950_20190807T044019_017475_020DCA_CE96.zip" ] - + self.files = [ + "https://datapool.asf.alaska.edu/GRD_HD/SB/S1B_IW_GRDH_1SDV_20190807T045445_20190807T045510_017475_020DCD_A6EF.zip", + "https://datapool.asf.alaska.edu/GRD_HD/SB/S1B_IW_GRDH_1SDV_20190807T043950_20190807T044019_017475_020DCA_CE96.zip"] + # Local stash of cookies so we don't always have to ask - self.cookie_jar_path = os.path.join( os.path.expanduser('~'), ".bulk_download_cookiejar.txt") + self.cookie_jar_path = os.path.join(os.path.expanduser('~'), ".bulk_download_cookiejar.txt") self.cookie_jar = None - - self.asf_urs4 = { 'url': 'https://urs.earthdata.nasa.gov/oauth/authorize', - 'client': 'BO_n7nTIlMljdvU6kRRB3g', - 'redir': 'https://vertex-retired.daac.asf.alaska.edu/services/urs4_token_request'} - + + self.asf_urs4 = {'url': 'https://urs.earthdata.nasa.gov/oauth/authorize', + 'client': 'BO_n7nTIlMljdvU6kRRB3g', + 'redir': 'https://vertex-retired.daac.asf.alaska.edu/services/urs4_token_request'} + # Make sure we can write it our current directory if os.access(os.getcwd(), os.W_OK) is False: - print ("WARNING: Cannot write to current path! Check permissions for {0}".format(os.getcwd())) + print("WARNING: Cannot write to current path! Check permissions for {0}".format(os.getcwd())) exit(-1) - + # For SSL self.context = {} - + # Check if user handed in a Metalink or CSV: if len(sys.argv) > 0: download_files = [] @@ -108,489 +114,497 @@ def __init__(self): except AttributeError: # Python 2.6 won't complain about SSL Validation pass - + elif arg.endswith('.metalink') or arg.endswith('.csv'): - if os.path.isfile( arg ): - input_files.append( arg ) + if os.path.isfile(arg): + input_files.append(arg) if arg.endswith('.metalink'): new_files = self.process_metalink(arg) else: new_files = self.process_csv(arg) if new_files is not None: for file_url in (new_files): - download_files.append( file_url ) + download_files.append(file_url) else: - print (" > I cannot find the input file you specified: {0}".format(arg)) + print(" > I cannot find the input file you specified: {0}".format(arg)) else: - print (" > Command line argument '{0}' makes no sense, ignoring.".format(arg)) - + print(" > Command line argument '{0}' makes no sense, ignoring.".format(arg)) + if len(input_files) > 0: if len(download_files) > 0: - print (" > Processing {0} downloads from {1} input files. ".format(len(download_files), len(input_files))) + print(" > Processing {0} downloads from {1} input files. ".format(len(download_files), + len(input_files))) self.files = download_files else: - print (" > I see you asked me to download files from {0} input files, but they had no downloads!".format(len(input_files))) - print (" > I'm super confused and exiting.") + print( + " > I see you asked me to download files from {0} input files, but they had no downloads!".format( + len(input_files))) + print(" > I'm super confused and exiting.") exit(-1) - + # Make sure cookie_jar is good to go! self.get_cookie() - - # summary + + # summary self.total_bytes = 0 self.total_time = 0 self.cnt = 0 self.success = [] self.failed = [] self.skipped = [] - - + # Get and validate a cookie def get_cookie(self): - if os.path.isfile(self.cookie_jar_path): - self.cookie_jar = MozillaCookieJar() - self.cookie_jar.load(self.cookie_jar_path) - - # make sure cookie is still valid - if self.check_cookie(): - print(" > Re-using previous cookie jar.") - return True - else: - print(" > Could not validate old cookie Jar") - - # We don't have a valid cookie, prompt user or creds - print ("No existing URS cookie found, please enter Earthdata username & password:") - print ("(Credentials will not be stored, saved or logged anywhere)") - - # Keep trying 'till user gets the right U:P - while self.check_cookie() is False: - self.get_new_cookie() - - return True - + if os.path.isfile(self.cookie_jar_path): + self.cookie_jar = MozillaCookieJar() + self.cookie_jar.load(self.cookie_jar_path) + + # make sure cookie is still valid + if self.check_cookie(): + print(" > Re-using previous cookie jar.") + return True + else: + print(" > Could not validate old cookie Jar") + + # We don't have a valid cookie, prompt user or creds + print("No existing URS cookie found, please enter Earthdata username & password:") + print("(Credentials will not be stored, saved or logged anywhere)") + + # Keep trying 'till user gets the right U:P + while self.check_cookie() is False: + self.get_new_cookie() + + return True + # Validate cookie before we begin def check_cookie(self): - - if self.cookie_jar is None: - print (" > Cookiejar is bunk: {0}".format(self.cookie_jar)) - return False - - # File we know is valid, used to validate cookie - file_check = 'https://urs.earthdata.nasa.gov/profile' - - # Apply custom Redirect Hanlder - opener = build_opener(HTTPCookieProcessor(self.cookie_jar), HTTPHandler(), HTTPSHandler(**self.context)) - install_opener(opener) - - # Attempt a HEAD request - request = Request(file_check) - request.get_method = lambda : 'HEAD' - try: - print (" > attempting to download {0}".format(file_check)) - response = urlopen(request, timeout=30) - resp_code = response.getcode() - # Make sure we're logged in - if not self.check_cookie_is_logged_in(self.cookie_jar): - return False - - # Save cookiejar - self.cookie_jar.save(self.cookie_jar_path) - - except HTTPError: - # If we ge this error, again, it likely means the user has not agreed to current EULA - print ("\nIMPORTANT: ") - print ("Your user appears to lack permissions to download data from the ASF Datapool.") - print ("\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov") - exit(-1) - - # This return codes indicate the USER has not been approved to download the data - if resp_code in (300, 301, 302, 303): - try: - redir_url = response.info().getheader('Location') - except AttributeError: - redir_url = response.getheader('Location') - - #Funky Test env: - if ("vertex-retired.daac.asf.alaska.edu" in redir_url and "test" in self.asf_urs4['redir']): - print ("Cough, cough. It's dusty in this test env!") - return True - - print ("Redirect ({0}) occured, invalid cookie value!".format(resp_code)) - return False - - # These are successes! - if resp_code in (200, 307): - return True - - return False - + + if self.cookie_jar is None: + print(" > Cookiejar is bunk: {0}".format(self.cookie_jar)) + return False + + # File we know is valid, used to validate cookie + file_check = 'https://urs.earthdata.nasa.gov/profile' + + # Apply custom Redirect Hanlder + opener = build_opener(HTTPCookieProcessor(self.cookie_jar), HTTPHandler(), HTTPSHandler(**self.context)) + install_opener(opener) + + # Attempt a HEAD request + request = Request(file_check) + request.get_method = lambda: 'HEAD' + try: + print(" > attempting to download {0}".format(file_check)) + response = urlopen(request, timeout=30) + resp_code = response.getcode() + # Make sure we're logged in + if not self.check_cookie_is_logged_in(self.cookie_jar): + return False + + # Save cookiejar + self.cookie_jar.save(self.cookie_jar_path) + + except HTTPError: + # If we ge this error, again, it likely means the user has not agreed to current EULA + print("\nIMPORTANT: ") + print("Your user appears to lack permissions to download data from the ASF Datapool.") + print( + "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov") + exit(-1) + + # This return codes indicate the USER has not been approved to download the data + if resp_code in (300, 301, 302, 303): + try: + redir_url = response.info().getheader('Location') + except AttributeError: + redir_url = response.getheader('Location') + + # Funky Test env: + if ("vertex-retired.daac.asf.alaska.edu" in redir_url and "test" in self.asf_urs4['redir']): + print("Cough, cough. It's dusty in this test env!") + return True + + print("Redirect ({0}) occured, invalid cookie value!".format(resp_code)) + return False + + # These are successes! + if resp_code in (200, 307): + return True + + return False + def get_new_cookie(self): - # Start by prompting user to input their credentials - - # Another Python2/3 workaround - try: - new_username = raw_input("Username: ") - except NameError: - new_username = input("Username: ") - new_password = getpass.getpass(prompt="Password (will not be displayed): ") - - # Build URS4 Cookie request - auth_cookie_url = self.asf_urs4['url'] + '?client_id=' + self.asf_urs4['client'] + '&redirect_uri=' + self.asf_urs4['redir'] + '&response_type=code&state=' - - try: - #python2 - user_pass = base64.b64encode (bytes(new_username+":"+new_password)) - except TypeError: - #python3 - user_pass = base64.b64encode (bytes(new_username+":"+new_password, "utf-8")) - user_pass = user_pass.decode("utf-8") - - # Authenticate against URS, grab all the cookies - self.cookie_jar = MozillaCookieJar() - opener = build_opener(HTTPCookieProcessor(self.cookie_jar), HTTPHandler(), HTTPSHandler(**self.context)) - request = Request(auth_cookie_url, headers={"Authorization": "Basic {0}".format(user_pass)}) - - # Watch out cookie rejection! - try: - response = opener.open(request) - except HTTPError as e: - if e.code == 401: - print (" > Username and Password combo was not successful. Please try again.") - return False - else: - # If an error happens here, the user most likely has not confirmed EULA. - print ("\nIMPORTANT: There was an error obtaining a download cookie!") - print ("Your user appears to lack permission to download data from the ASF Datapool.") - print ("\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov") - exit(-1) - except URLError as e: - print ("\nIMPORTANT: There was a problem communicating with URS, unable to obtain cookie. ") - print ("Try cookie generation later.") - exit(-1) - - # Did we get a cookie? - if self.check_cookie_is_logged_in(self.cookie_jar): - #COOKIE SUCCESS! - self.cookie_jar.save(self.cookie_jar_path) - return True - - # if we aren't successful generating the cookie, nothing will work. Stop here! - print ("WARNING: Could not generate new cookie! Cannot proceed. Please try Username and Password again.") - print ("Response was {0}.".format(response.getcode())) - print ("\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov") - exit(-1) - + # Start by prompting user to input their credentials + + # Another Python2/3 workaround + try: + new_username = raw_input("Username: ") + except NameError: + new_username = input("Username: ") + new_password = getpass.getpass(prompt="Password (will not be displayed): ") + + # Build URS4 Cookie request + auth_cookie_url = self.asf_urs4['url'] + '?client_id=' + self.asf_urs4['client'] + '&redirect_uri=' + \ + self.asf_urs4['redir'] + '&response_type=code&state=' + + try: + # python2 + user_pass = base64.b64encode(bytes(new_username + ":" + new_password)) + except TypeError: + # python3 + user_pass = base64.b64encode(bytes(new_username + ":" + new_password, "utf-8")) + user_pass = user_pass.decode("utf-8") + + # Authenticate against URS, grab all the cookies + self.cookie_jar = MozillaCookieJar() + opener = build_opener(HTTPCookieProcessor(self.cookie_jar), HTTPHandler(), HTTPSHandler(**self.context)) + request = Request(auth_cookie_url, headers={"Authorization": "Basic {0}".format(user_pass)}) + + # Watch out cookie rejection! + try: + response = opener.open(request) + except HTTPError as e: + if e.code == 401: + print(" > Username and Password combo was not successful. Please try again.") + return False + else: + # If an error happens here, the user most likely has not confirmed EULA. + print("\nIMPORTANT: There was an error obtaining a download cookie!") + print("Your user appears to lack permission to download data from the ASF Datapool.") + print( + "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov") + exit(-1) + except URLError as e: + print("\nIMPORTANT: There was a problem communicating with URS, unable to obtain cookie. ") + print("Try cookie generation later.") + exit(-1) + + # Did we get a cookie? + if self.check_cookie_is_logged_in(self.cookie_jar): + # COOKIE SUCCESS! + self.cookie_jar.save(self.cookie_jar_path) + return True + + # if we aren't successful generating the cookie, nothing will work. Stop here! + print("WARNING: Could not generate new cookie! Cannot proceed. Please try Username and Password again.") + print("Response was {0}.".format(response.getcode())) + print( + "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov") + exit(-1) + # make sure we're logged into URS def check_cookie_is_logged_in(self, cj): - for cookie in cj: - if cookie.name == 'urs_user_already_logged': - # Only get this cookie if we logged in successfully! - return True - - return False - - + for cookie in cj: + if cookie.name == 'urs_user_already_logged': + # Only get this cookie if we logged in successfully! + return True + + return False + # Download the file def download_file_with_cookiejar(self, url, file_count, total, recursion=False): - # see if we've already download this file and if it is that it is the correct size - download_file = os.path.basename(url).split('?')[0] - if os.path.isfile(download_file): - try: - request = Request(url) - request.get_method = lambda : 'HEAD' - response = urlopen(request, timeout=30) - remote_size = self.get_total_size(response) - # Check that we were able to derive a size. - if remote_size: - local_size = os.path.getsize(download_file) - if remote_size < (local_size+(local_size*.01)) and remote_size > (local_size-(local_size*.01)): - print (" > Download file {0} exists! \n > Skipping download of {1}. ".format(download_file, url)) - return None,None - #partial file size wasn't full file size, lets blow away the chunk and start again - print (" > Found {0} but it wasn't fully downloaded. Removing file and downloading again.".format(download_file)) - os.remove(download_file) - - except ssl.CertificateError as e: - print (" > ERROR: {0}".format(e)) - print (" > Could not validate SSL Cert. You may be able to overcome this using the --insecure flag") - return False,None - - except HTTPError as e: - if e.code == 401: - print (" > IMPORTANT: Your user may not have permission to download this type of data!") - else: - print (" > Unknown Error, Could not get file HEAD: {0}".format(e)) - - except URLError as e: - print ("URL Error (from HEAD): {0}, {1}".format( e.reason, url)) - if "ssl.c" in "{0}".format(e.reason): - print ("IMPORTANT: Remote location may not be accepting your SSL configuration. This is a terminal error.") - return False,None - - # attempt https connection - try: - request = Request(url) - response = urlopen(request, timeout=30) - - # Watch for redirect - if response.geturl() != url: - - # See if we were redirect BACK to URS for re-auth. - if 'https://urs.earthdata.nasa.gov/oauth/authorize' in response.geturl(): - - if recursion: - print (" > Entering seemingly endless auth loop. Aborting. ") - return False, None - - # make this easier. If there is no app_type=401, add it - new_auth_url = response.geturl() - if "app_type" not in new_auth_url: - new_auth_url += "&app_type=401" - - print (" > While attempting to download {0}....".format(url)) - print (" > Need to obtain new cookie from {0}".format(new_auth_url)) - old_cookies = [cookie.name for cookie in self.cookie_jar] - opener = build_opener(HTTPCookieProcessor(self.cookie_jar), HTTPHandler(), HTTPSHandler(**self.context)) - request = Request(new_auth_url) - try: - response = opener.open(request) - for cookie in self.cookie_jar: - if cookie.name not in old_cookies: - print (" > Saved new cookie: {0}".format(cookie.name)) - - # A little hack to save session cookies - if cookie.discard: - cookie.expires = int(time.time()) + 60*60*24*30 - print (" > Saving session Cookie that should have been discarded! ") - - self.cookie_jar.save(self.cookie_jar_path, ignore_discard=True, ignore_expires=True) - except HTTPError as e: - print ("HTTP Error: {0}, {1}".format( e.code, url)) - return False,None - - # Okay, now we have more cookies! Lets try again, recursively! - print (" > Attempting download again with new cookies!") - return self.download_file_with_cookiejar(url, file_count, total, recursion=True) - - print (" > 'Temporary' Redirect download @ Remote archive:\n > {0}".format(response.geturl())) - - # seems to be working - print ("({0}/{1}) Downloading {2}".format(file_count, total, url)) - - # Open our local file for writing and build status bar - tf = tempfile.NamedTemporaryFile(mode='w+b', delete=False, dir='.') - self.chunk_read(response, tf, report_hook=self.chunk_report) - - # Reset download status - sys.stdout.write('\n') - - tempfile_name = tf.name - tf.close() - - #handle errors - except HTTPError as e: - print ("HTTP Error: {0}, {1}".format( e.code, url)) - - if e.code == 401: - print (" > IMPORTANT: Your user does not have permission to download this type of data!") - - if e.code == 403: - print (" > Got a 403 Error trying to download this file. ") - print (" > You MAY need to log in this app and agree to a EULA. ") - - return False,None - - except URLError as e: - print ("URL Error (from GET): {0}, {1}, {2}".format(e, e.reason, url)) - if "ssl.c" in "{0}".format(e.reason): - print ("IMPORTANT: Remote location may not be accepting your SSL configuration. This is a terminal error.") - return False,None - - except ssl.CertificateError as e: - print (" > ERROR: {0}".format(e)) - print (" > Could not validate SSL Cert. You may be able to overcome this using the --insecure flag") - return False,None - - # Return the file size - shutil.copy(tempfile_name, download_file) - os.remove(tempfile_name) - file_size = self.get_total_size(response) - actual_size = os.path.getsize(download_file) - if file_size is None: - # We were unable to calculate file size. - file_size = actual_size - return actual_size,file_size - + # see if we've already download this file and if it is that it is the correct size + download_file = os.path.basename(url).split('?')[0] + if os.path.isfile(download_file): + try: + request = Request(url) + request.get_method = lambda: 'HEAD' + response = urlopen(request, timeout=30) + remote_size = self.get_total_size(response) + # Check that we were able to derive a size. + if remote_size: + local_size = os.path.getsize(download_file) + if remote_size < (local_size + (local_size * .01)) and remote_size > ( + local_size - (local_size * .01)): + print(" > Download file {0} exists! \n > Skipping download of {1}. ".format(download_file, url)) + return None, None + # partial file size wasn't full file size, lets blow away the chunk and start again + print(" > Found {0} but it wasn't fully downloaded. Removing file and downloading again.".format( + download_file)) + os.remove(download_file) + + except ssl.CertificateError as e: + print(" > ERROR: {0}".format(e)) + print(" > Could not validate SSL Cert. You may be able to overcome this using the --insecure flag") + return False, None + + except HTTPError as e: + if e.code == 401: + print(" > IMPORTANT: Your user may not have permission to download this type of data!") + else: + print(" > Unknown Error, Could not get file HEAD: {0}".format(e)) + + except URLError as e: + print("URL Error (from HEAD): {0}, {1}".format(e.reason, url)) + if "ssl.c" in "{0}".format(e.reason): + print( + "IMPORTANT: Remote location may not be accepting your SSL configuration. This is a terminal error.") + return False, None + + # attempt https connection + try: + request = Request(url) + response = urlopen(request, timeout=30) + + # Watch for redirect + if response.geturl() != url: + + # See if we were redirect BACK to URS for re-auth. + if 'https://urs.earthdata.nasa.gov/oauth/authorize' in response.geturl(): + + if recursion: + print(" > Entering seemingly endless auth loop. Aborting. ") + return False, None + + # make this easier. If there is no app_type=401, add it + new_auth_url = response.geturl() + if "app_type" not in new_auth_url: + new_auth_url += "&app_type=401" + + print(" > While attempting to download {0}....".format(url)) + print(" > Need to obtain new cookie from {0}".format(new_auth_url)) + old_cookies = [cookie.name for cookie in self.cookie_jar] + opener = build_opener(HTTPCookieProcessor(self.cookie_jar), HTTPHandler(), + HTTPSHandler(**self.context)) + request = Request(new_auth_url) + try: + response = opener.open(request) + for cookie in self.cookie_jar: + if cookie.name not in old_cookies: + print(" > Saved new cookie: {0}".format(cookie.name)) + + # A little hack to save session cookies + if cookie.discard: + cookie.expires = int(time.time()) + 60 * 60 * 24 * 30 + print(" > Saving session Cookie that should have been discarded! ") + + self.cookie_jar.save(self.cookie_jar_path, ignore_discard=True, ignore_expires=True) + except HTTPError as e: + print("HTTP Error: {0}, {1}".format(e.code, url)) + return False, None + + # Okay, now we have more cookies! Lets try again, recursively! + print(" > Attempting download again with new cookies!") + return self.download_file_with_cookiejar(url, file_count, total, recursion=True) + + print(" > 'Temporary' Redirect download @ Remote archive:\n > {0}".format(response.geturl())) + + # seems to be working + print("({0}/{1}) Downloading {2}".format(file_count, total, url)) + + # Open our local file for writing and build status bar + tf = tempfile.NamedTemporaryFile(mode='w+b', delete=False, dir='.') + self.chunk_read(response, tf, report_hook=self.chunk_report) + + # Reset download status + sys.stdout.write('\n') + + tempfile_name = tf.name + tf.close() + + # handle errors + except HTTPError as e: + print("HTTP Error: {0}, {1}".format(e.code, url)) + + if e.code == 401: + print(" > IMPORTANT: Your user does not have permission to download this type of data!") + + if e.code == 403: + print(" > Got a 403 Error trying to download this file. ") + print(" > You MAY need to log in this app and agree to a EULA. ") + + return False, None + + except URLError as e: + print("URL Error (from GET): {0}, {1}, {2}".format(e, e.reason, url)) + if "ssl.c" in "{0}".format(e.reason): + print( + "IMPORTANT: Remote location may not be accepting your SSL configuration. This is a terminal error.") + return False, None + + except ssl.CertificateError as e: + print(" > ERROR: {0}".format(e)) + print(" > Could not validate SSL Cert. You may be able to overcome this using the --insecure flag") + return False, None + + # Return the file size + shutil.copy(tempfile_name, download_file) + os.remove(tempfile_name) + file_size = self.get_total_size(response) + actual_size = os.path.getsize(download_file) + if file_size is None: + # We were unable to calculate file size. + file_size = actual_size + return actual_size, file_size + def get_redirect_url_from_error(self, error): - find_redirect = re.compile(r"id=\"redir_link\"\s+href=\"(\S+)\"") - print ("error file was: {}".format(error)) - redirect_url = find_redirect.search(error) - if redirect_url: - print("Found: {0}".format(redirect_url.group(0))) - return (redirect_url.group(0)) - - return None - - + find_redirect = re.compile(r"id=\"redir_link\"\s+href=\"(\S+)\"") + print("error file was: {}".format(error)) + redirect_url = find_redirect.search(error) + if redirect_url: + print("Found: {0}".format(redirect_url.group(0))) + return (redirect_url.group(0)) + + return None + # chunk_report taken from http://stackoverflow.com/questions/2028517/python-urllib2-progress-hook def chunk_report(self, bytes_so_far, file_size): - if file_size is not None: - percent = float(bytes_so_far) / file_size - percent = round(percent*100, 2) - sys.stdout.write(" > Downloaded %d of %d bytes (%0.2f%%)\r" % - (bytes_so_far, file_size, percent)) - else: - # We couldn't figure out the size. - sys.stdout.write(" > Downloaded %d of unknown Size\r" % (bytes_so_far)) - + if file_size is not None: + percent = float(bytes_so_far) / file_size + percent = round(percent * 100, 2) + sys.stdout.write(" > Downloaded %d of %d bytes (%0.2f%%)\r" % + (bytes_so_far, file_size, percent)) + else: + # We couldn't figure out the size. + sys.stdout.write(" > Downloaded %d of unknown Size\r" % (bytes_so_far)) + # chunk_read modified from http://stackoverflow.com/questions/2028517/python-urllib2-progress-hook def chunk_read(self, response, local_file, chunk_size=8192, report_hook=None): - file_size = self.get_total_size(response) - bytes_so_far = 0 - - while 1: - try: - chunk = response.read(chunk_size) - except: - sys.stdout.write("\n > There was an error reading data. \n") - break - - try: - local_file.write(chunk) - except TypeError: - local_file.write(chunk.decode(local_file.encoding)) - bytes_so_far += len(chunk) - - if not chunk: - break - - if report_hook: - report_hook(bytes_so_far, file_size) - - return bytes_so_far - + file_size = self.get_total_size(response) + bytes_so_far = 0 + + while 1: + try: + chunk = response.read(chunk_size) + except: + sys.stdout.write("\n > There was an error reading data. \n") + break + + try: + local_file.write(chunk) + except TypeError: + local_file.write(chunk.decode(local_file.encoding)) + bytes_so_far += len(chunk) + + if not chunk: + break + + if report_hook: + report_hook(bytes_so_far, file_size) + + return bytes_so_far + def get_total_size(self, response): - try: - file_size = response.info().getheader('Content-Length').strip() - except AttributeError: - try: - file_size = response.getheader('Content-Length').strip() - except AttributeError: - print ("> Problem getting size") - return None - - return int(file_size) - - + try: + file_size = response.info().getheader('Content-Length').strip() + except AttributeError: + try: + file_size = response.getheader('Content-Length').strip() + except AttributeError: + print("> Problem getting size") + return None + + return int(file_size) + # Get download urls from a metalink file def process_metalink(self, ml_file): - print ("Processing metalink file: {0}".format(ml_file)) - with open(ml_file, 'r') as ml: - xml = ml.read() - - # Hack to remove annoying namespace - it = ET.iterparse(StringIO(xml)) - for _, el in it: - if '}' in el.tag: - el.tag = el.tag.split('}', 1)[1] # strip all namespaces - root = it.root - - dl_urls = [] - ml_files = root.find('files') - for dl in ml_files: - dl_urls.append(dl.find('resources').find('url').text) - - if len(dl_urls) > 0: - return dl_urls - else: - return None - + print("Processing metalink file: {0}".format(ml_file)) + with open(ml_file, 'r') as ml: + xml = ml.read() + + # Hack to remove annoying namespace + it = ET.iterparse(StringIO(xml)) + for _, el in it: + if '}' in el.tag: + el.tag = el.tag.split('}', 1)[1] # strip all namespaces + root = it.root + + dl_urls = [] + ml_files = root.find('files') + for dl in ml_files: + dl_urls.append(dl.find('resources').find('url').text) + + if len(dl_urls) > 0: + return dl_urls + else: + return None + # Get download urls from a csv file def process_csv(self, csv_file): - print ("Processing csv file: {0}".format(csv_file)) - - dl_urls = [] - with open(csv_file, 'r') as csvf: - try: - csvr = csv.DictReader(csvf) - for row in csvr: - dl_urls.append(row['URL']) - except csv.Error as e: - print ("WARNING: Could not parse file %s, line %d: %s. Skipping." % (csv_file, csvr.line_num, e)) - return None - except KeyError as e: - print ("WARNING: Could not find URL column in file %s. Skipping." % (csv_file)) - - if len(dl_urls) > 0: - return dl_urls - else: - return None + print("Processing csv file: {0}".format(csv_file)) + + dl_urls = [] + with open(csv_file, 'r') as csvf: + try: + csvr = csv.DictReader(csvf) + for row in csvr: + dl_urls.append(row['URL']) + except csv.Error as e: + print("WARNING: Could not parse file %s, line %d: %s. Skipping." % (csv_file, csvr.line_num, e)) + return None + except KeyError as e: + print("WARNING: Could not find URL column in file %s. Skipping." % (csv_file)) + + if len(dl_urls) > 0: + return dl_urls + else: + return None # Download all the files in the list def download_files(self): for file_name in self.files: - + # make sure we haven't ctrl+c'd or some other abort trap if abort == True: - raise SystemExit - + raise SystemExit + # download counter self.cnt += 1 - + # set a timer start = time.time() - + # run download - size,total_size = self.download_file_with_cookiejar(file_name, self.cnt, len(self.files)) - + size, total_size = self.download_file_with_cookiejar(file_name, self.cnt, len(self.files)) + # calculte rate end = time.time() - + # stats: if size is None: self.skipped.append(file_name) # Check to see that the download didn't error and is the correct size - elif size is not False and (total_size < (size+(size*.01)) and total_size > (size-(size*.01))): + elif size is not False and (total_size < (size + (size * .01)) and total_size > (size - (size * .01))): # Download was good! elapsed = end - start elapsed = 1.0 if elapsed < 1 else elapsed - rate = (size/1024**2)/elapsed - - print ("Downloaded {0}b in {1:.2f}secs, Average Rate: {2:.2f}MB/sec".format(size, elapsed, rate)) - + rate = (size / 1024 ** 2) / elapsed + + print("Downloaded {0}b in {1:.2f}secs, Average Rate: {2:.2f}MB/sec".format(size, elapsed, rate)) + # add up metrics self.total_bytes += size self.total_time += elapsed - self.success.append( {'file':file_name, 'size':size } ) - + self.success.append({'file': file_name, 'size': size}) + else: - print ("There was a problem downloading {0}".format(file_name)) + print("There was a problem downloading {0}".format(file_name)) self.failed.append(file_name) - + def print_summary(self): # Print summary: - print ("\n\nDownload Summary ") - print ("--------------------------------------------------------------------------------") - print (" Successes: {0} files, {1} bytes ".format(len(self.success), self.total_bytes)) + print("\n\nDownload Summary ") + print("--------------------------------------------------------------------------------") + print(" Successes: {0} files, {1} bytes ".format(len(self.success), self.total_bytes)) for success_file in self.success: - print (" - {0} {1:.2f}MB".format(success_file['file'],(success_file['size']/1024.0**2))) + print(" - {0} {1:.2f}MB".format(success_file['file'], (success_file['size'] / 1024.0 ** 2))) if len(self.failed) > 0: - print (" Failures: {0} files".format(len(self.failed))) - for failed_file in self.failed: - print (" - {0}".format(failed_file)) + print(" Failures: {0} files".format(len(self.failed))) + for failed_file in self.failed: + print(" - {0}".format(failed_file)) if len(self.skipped) > 0: - print (" Skipped: {0} files".format(len(self.skipped))) - for skipped_file in self.skipped: - print (" - {0}".format(skipped_file)) + print(" Skipped: {0} files".format(len(self.skipped))) + for skipped_file in self.skipped: + print(" - {0}".format(skipped_file)) if len(self.success) > 0: - print (" Average Rate: {0:.2f}MB/sec".format( (self.total_bytes/1024.0**2)/self.total_time)) - print ("--------------------------------------------------------------------------------") - + print(" Average Rate: {0:.2f}MB/sec".format((self.total_bytes / 1024.0 ** 2) / self.total_time)) + print("--------------------------------------------------------------------------------") + if __name__ == "__main__": # Setup a signal trap for SIGINT (Ctrl+C) signal.signal(signal.SIGINT, signal_handler) - + downloader = bulk_downloader() downloader.download_files() downloader.print_summary() From c16f23a9277427607eafa1e039638487b39f8fc0 Mon Sep 17 00:00:00 2001 From: John Truckenbrodt Date: Wed, 7 Aug 2019 16:32:41 +0200 Subject: [PATCH 3/8] [asf_template.py] replaced file list with placeholder for replacement --- sentinel_api/asf_template.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sentinel_api/asf_template.py b/sentinel_api/asf_template.py index b00543c..ed9a462 100755 --- a/sentinel_api/asf_template.py +++ b/sentinel_api/asf_template.py @@ -80,9 +80,7 @@ def signal_handler(sig, frame): class bulk_downloader: def __init__(self): # List of files to download - self.files = [ - "https://datapool.asf.alaska.edu/GRD_HD/SB/S1B_IW_GRDH_1SDV_20190807T045445_20190807T045510_017475_020DCD_A6EF.zip", - "https://datapool.asf.alaska.edu/GRD_HD/SB/S1B_IW_GRDH_1SDV_20190807T043950_20190807T044019_017475_020DCA_CE96.zip"] + self.files = ['placeholder_files'] # Local stash of cookies so we don't always have to ask self.cookie_jar_path = os.path.join(os.path.expanduser('~'), ".bulk_download_cookiejar.txt") From 99c0355d4e42cebf971548153d69416080d807ae Mon Sep 17 00:00:00 2001 From: John Truckenbrodt Date: Wed, 7 Aug 2019 16:34:09 +0200 Subject: [PATCH 4/8] [asf_template.py] new variable targetdir with placeholder string value so that files can be downloaded to specific directory instead of just the location of the script --- sentinel_api/asf_template.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/sentinel_api/asf_template.py b/sentinel_api/asf_template.py index ed9a462..99b1896 100755 --- a/sentinel_api/asf_template.py +++ b/sentinel_api/asf_template.py @@ -90,9 +90,11 @@ def __init__(self): 'client': 'BO_n7nTIlMljdvU6kRRB3g', 'redir': 'https://vertex-retired.daac.asf.alaska.edu/services/urs4_token_request'} + self.targetdir = 'placeholder_targetdir' + # Make sure we can write it our current directory - if os.access(os.getcwd(), os.W_OK) is False: - print("WARNING: Cannot write to current path! Check permissions for {0}".format(os.getcwd())) + if os.access(self.targetdir, os.W_OK) is False: + print("WARNING: Cannot write to current path! Check permissions for {0}".format(self.targetdir)) exit(-1) # For SSL @@ -302,7 +304,8 @@ def check_cookie_is_logged_in(self, cj): # Download the file def download_file_with_cookiejar(self, url, file_count, total, recursion=False): # see if we've already download this file and if it is that it is the correct size - download_file = os.path.basename(url).split('?')[0] + download_file_base = os.path.basename(url).split('?')[0] + download_file = os.path.join(self.targetdir, download_file_base) if os.path.isfile(download_file): try: request = Request(url) @@ -391,7 +394,7 @@ def download_file_with_cookiejar(self, url, file_count, total, recursion=False): print("({0}/{1}) Downloading {2}".format(file_count, total, url)) # Open our local file for writing and build status bar - tf = tempfile.NamedTemporaryFile(mode='w+b', delete=False, dir='.') + tf = tempfile.NamedTemporaryFile(mode='w+b', delete=False, dir=self.targetdir) self.chunk_read(response, tf, report_hook=self.chunk_report) # Reset download status From 04f059e73ea58d493541428fc259bc53abd492b6 Mon Sep 17 00:00:00 2001 From: John Truckenbrodt Date: Wed, 7 Aug 2019 17:39:35 +0200 Subject: [PATCH 5/8] [SentinelDownloader._write_download_asf] new method --- sentinel_api/sentinel_api.py | 37 +++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/sentinel_api/sentinel_api.py b/sentinel_api/sentinel_api.py index c4a9e2f..f023f2e 100644 --- a/sentinel_api/sentinel_api.py +++ b/sentinel_api/sentinel_api.py @@ -541,7 +541,42 @@ def _write_download_urls(self, filename): for scene in self.__scenes: outfile.write(scene['url'] + '\n') return filename - + + def _write_download_asf(self, filename): + template = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'asf_template.py') + + with open(template, 'r') as temp: + content = temp.read() + pattern = r'^(?PS1[AB])_' \ + r'(?PS1|S2|S3|S4|S5|S6|IW|EW|WV|EN|N1|N2|N3|N4|N5|N6|IM)_' \ + r'(?PSLC|GRD|OCN)' \ + r'(?P[FHM_])' + errormessage = '[ASF writer] unknown product: {}' + targets = [] + for scene in self.__scenes: + title = scene['title'] + match = re.search(pattern, title) + if match: + meta = match.groupdict() + url = 'https://datapool.asf.alaska.edu' + if meta['product'] == 'SLC': + url += '/SLC' + elif meta['product'] == 'GRD': + url += '/GRD_{}D'.format(meta['subproduct']) + else: + raise RuntimeError(errormessage.format(title)) + url += re.sub(r'(S)1([AB])', r'/\1\2/', meta['sensor']) + url += title + '.zip' + targets.append(url) + else: + raise RuntimeError(errormessage.format(title)) + linebreak = '\n{}"'.format(' ' * 12) + filestring = ('",' + linebreak).join(targets) + replacement = linebreak + filestring + '"' + content = content.replace("'placeholder_files'", replacement) + content = content.replace("placeholder_targetdir", self.__download_dir) + with open(filename, 'w') as out: + out.write(content) ########################################################### # Example use of class From f1d8b171746d137c0aa5e4e2087f3cc08e426831 Mon Sep 17 00:00:00 2001 From: John Truckenbrodt Date: Wed, 7 Aug 2019 17:40:02 +0200 Subject: [PATCH 6/8] [SentinelDownloader.write_results] offer new option 'asf' --- sentinel_api/sentinel_api.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sentinel_api/sentinel_api.py b/sentinel_api/sentinel_api.py index f023f2e..06b0842 100644 --- a/sentinel_api/sentinel_api.py +++ b/sentinel_api/sentinel_api.py @@ -226,6 +226,8 @@ def write_results(self, file_type, filename, output=False): self._write_download_wget(filename) elif file_type == 'json': self._write_json(filename) + elif file_type == 'asf': + self._write_download_asf(filename) else: self._write_download_urls(filename) From bdd8bd1e7ecf1c8ea12f667893b6646b3a9e53b4 Mon Sep 17 00:00:00 2001 From: John Truckenbrodt Date: Thu, 17 Oct 2019 15:35:56 +0200 Subject: [PATCH 7/8] [SentinelDownloader] sorted methods alphabetically --- sentinel_api/sentinel_api.py | 540 ++++++++++++++++++----------------- 1 file changed, 271 insertions(+), 269 deletions(-) diff --git a/sentinel_api/sentinel_api.py b/sentinel_api/sentinel_api.py index 6c6ab0b..0b3c18d 100644 --- a/sentinel_api/sentinel_api.py +++ b/sentinel_api/sentinel_api.py @@ -19,7 +19,6 @@ import requests from osgeo import ogr -ogr.UseExceptions() from spatialist.vector import Vector, wkt2vector, intersect @@ -28,6 +27,8 @@ import zipfile as zf from datetime import datetime, date +ogr.UseExceptions() + class SentinelDownloader(object): """Class to search and download for Sentinel data""" @@ -46,60 +47,87 @@ def __init__(self, username, password, api_url='https://scihub.copernicus.eu/api self.__esa_username = username self.__esa_password = password - def set_download_dir(self, download_dir): - """Set directory for check against existing downloaded files and as directory where to download - - Args: - download_dir: Path to directory - - """ - print('Setting download directory to %s' % download_dir) - if not os.path.exists(download_dir): - os.makedirs(download_dir) - - self.__download_dir = download_dir - - def set_data_dir(self, data_dir): - """Set directory for check against existing downloaded files; this can be repeated multiple times to create a list of data directories + def download_all(self, download_dir=None): + """Download all scenes Args: - data_dir: Path to directory - - """ - print('Adding data directory {}'.format(data_dir)) - self.__data_dirs.append(data_dir) - - def set_geometries(self, geometries): - """Manually set one or more geometries for data search + download_dir: Define a directory where to download the scenes + (Default: Use default from class -> current directory) - Args: - geometries: String or List representation of one or more Wkt Geometries, - Geometries have to be in Lat/Lng, EPSG:4326 projection! + Returns: + Dictionary of failed ('failed') and successfully ('success') downloaded scenes """ - # print('Set geometries:') - # print(geometries) - if isinstance(geometries, list): - self.__geometries = geometries + if download_dir is None: + download_dir = self.__download_dir - elif isinstance(geometries, str): - self.__geometries = [geometries] + downloaded = [] + downloaded_failed = [] - else: - raise Exception('geometries parameter needs to be a list or a string') + for scene in self.__scenes: + url = scene['url'] + filename = scene['title'] + '.zip' + path = os.path.join(download_dir, filename) + print('===========================================================') + print('Download file path: %s' % path) + + try: + response = requests.get(url, auth=(self.__esa_username, self.__esa_password), stream=True) + except requests.exceptions.ConnectionError: + print('Connection Error') + continue + if 'Content-Length' not in response.headers: + print('Content-Length not found') + print(url) + continue + size = int(response.headers['Content-Length'].strip()) + if size < 1000000: + print('The found scene is too small: %s (%s)' % (scene['title'], size)) + print(url) + continue + + print('Size of the scene: %s MB' % (size / 1024 / 1024)) # show in MegaBytes + my_bytes = 0 + widgets = ["Downloading: ", pb.Bar(marker="*", left="[", right=" "), + pb.Percentage(), " ", pb.FileTransferSpeed(), "] ", + " of {0}MB".format(str(round(size / 1024 / 1024, 2))[:4])] + pbar = pb.ProgressBar(widgets=widgets, maxval=size).start() + + try: + down = open(path, 'wb') + for buf in response.iter_content(1024): + if buf: + down.write(buf) + my_bytes += len(buf) + pbar.update(my_bytes) + pbar.finish() + down.close() + except KeyboardInterrupt: + print("\nKeyboard interruption, remove current download and exit execution of script") + os.remove(path) + sys.exit(0) + + # Check if file is valid + print("Check if file is valid: ") + valid = self._is_valid(path) + + if not valid: + downloaded_failed.append(path) + print('invalid file is being deleted.') + os.remove(path) + else: + downloaded.append(path) - # Test first geometry - try: - vec = wkt2vector(self.__geometries[0], srs=4326) - except RuntimeError as e: - raise Exception('The first geometry is not valid! Error: %s' % e) - finally: - vec = None + return {'success': downloaded, 'failed': downloaded_failed} def get_geometries(self): """Return list of geometries""" return self.__geometries + def get_scenes(self): + """Return searched and filtered scenes""" + return self.__scenes + def load_sites(self, input_file): """ Load features from input file and transform geometries to Lat/Lon (EPSG 4326) @@ -117,6 +145,22 @@ def load_sites(self, input_file): print('Found %s features' % len(self.__geometries)) + @staticmethod + def multipolygon2list(wkt): + geom = ogr.CreateGeometryFromWkt(wkt) + if geom.GetGeometryName() == 'MULTIPOLYGON': + return [x.ExportToWkt() for x in geom] + else: + return [geom.ExportToWkt()] + + def print_scenes(self): + """Print title of searched and filtered scenes""" + + def sorter(x): return re.findall('[0-9T]{15}', x)[0] + + titles = sorted([x['title'] for x in self.__scenes], key=sorter) + print('\n'.join(titles)) + def search(self, platform, min_overlap=0.001, download_dir=None, start_date=None, end_date=None, date_type='beginPosition', **keywords): """Search in ESA Data Hub for scenes with given arguments @@ -198,17 +242,55 @@ def search(self, platform, min_overlap=0.001, download_dir=None, start_date=None print('%s total scenes after merging' % len(self.__scenes)) print('===========================================================') - def get_scenes(self): - """Return searched and filtered scenes""" - return self.__scenes + def set_data_dir(self, data_dir): + """Set directory for check against existing downloaded files; this can be repeated multiple times to create a list of data directories + + Args: + data_dir: Path to directory + + """ + print('Adding data directory {}'.format(data_dir)) + self.__data_dirs.append(data_dir) - def print_scenes(self): - """Print title of searched and filtered scenes""" + def set_download_dir(self, download_dir): + """Set directory for check against existing downloaded files and as directory where to download + + Args: + download_dir: Path to directory + + """ + print('Setting download directory to %s' % download_dir) + if not os.path.exists(download_dir): + os.makedirs(download_dir) - def sorter(x): return re.findall('[0-9T]{15}', x)[0] + self.__download_dir = download_dir + + def set_geometries(self, geometries): + """Manually set one or more geometries for data search + + Args: + geometries: String or List representation of one or more Wkt Geometries, + Geometries have to be in Lat/Lng, EPSG:4326 projection! + + """ + # print('Set geometries:') + # print(geometries) + if isinstance(geometries, list): + self.__geometries = geometries - titles = sorted([x['title'] for x in self.__scenes], key=sorter) - print('\n'.join(titles)) + elif isinstance(geometries, str): + self.__geometries = [geometries] + + else: + raise Exception('geometries parameter needs to be a list or a string') + + # Test first geometry + try: + vec = wkt2vector(self.__geometries[0], srs=4326) + except RuntimeError as e: + raise Exception('The first geometry is not valid! Error: %s' % e) + finally: + vec = None def write_results(self, file_type, filename, output=False): """Write results to disk in different kind of formats @@ -233,78 +315,83 @@ def write_results(self, file_type, filename, output=False): with open(filename, 'r') as infile: print(infile.read()) - def download_all(self, download_dir=None): - """Download all scenes + def _filter_existing(self, scenes): + """Filter scenes based on existing files in the define download directory and all further data directories Args: - download_dir: Define a directory where to download the scenes - (Default: Use default from class -> current directory) + scenes: List of scenes to be filtered Returns: - Dictionary of failed ('failed') and successfully ('success') downloaded scenes + Filtered list of scenes """ - if download_dir is None: - download_dir = self.__download_dir - - downloaded = [] - downloaded_failed = [] + filtered = [] + dirs = self.__data_dirs + [self.__download_dir] + for scene in scenes: + exist = [os.path.isfile(os.path.join(dir, scene['title'] + '.zip')) for dir in dirs] + if not any(exist): + filtered.append(scene) + return filtered + + @staticmethod + def _filter_overlap(scenes, wkt_geometry, min_overlap=0.001): + """Filter scenes based on the minimum overlap to the area of interest + + Args: + scenes: List of scenes to filter + wkt_geometry: Wkt Geometry representation of the area of interest + min_overlap: Minimum overlap (0-1) in decimal format between scene geometry and area of interest + + Returns: + Filtered list of scenes + + """ + filtered = [] - for scene in self.__scenes: - url = scene['url'] - filename = scene['title'] + '.zip' - path = os.path.join(download_dir, filename) - print('===========================================================') - print('Download file path: %s' % path) - - try: - response = requests.get(url, auth=(self.__esa_username, self.__esa_password), stream=True) - except requests.exceptions.ConnectionError: - print('Connection Error') - continue - if 'Content-Length' not in response.headers: - print('Content-Length not found') - print(url) - continue - size = int(response.headers['Content-Length'].strip()) - if size < 1000000: - print('The found scene is too small: %s (%s)' % (scene['title'], size)) - print(url) - continue + with wkt2vector(wkt_geometry, srs=4326) as vec1: + site_area = vec1.getArea() + for scene in scenes: + with wkt2vector(scene['footprint'], srs=4326) as vec2: + footprint_area = vec2.getArea() + inter = intersect(vec1, vec2) + if inter is not None: + intersect_area = inter.getArea() + overlap = intersect_area / site_area + inter.close() + else: + overlap = 0 + if overlap > min_overlap or ( + site_area / footprint_area > 1 and intersect_area / footprint_area > min_overlap): + scene['_script_overlap'] = overlap * 100 + filtered.append(scene) - print('Size of the scene: %s MB' % (size / 1024 / 1024)) # show in MegaBytes - my_bytes = 0 - widgets = ["Downloading: ", pb.Bar(marker="*", left="[", right=" "), - pb.Percentage(), " ", pb.FileTransferSpeed(), "] ", - " of {0}MB".format(str(round(size / 1024 / 1024, 2))[:4])] - pbar = pb.ProgressBar(widgets=widgets, maxval=size).start() - - try: - down = open(path, 'wb') - for buf in response.iter_content(1024): - if buf: - down.write(buf) - my_bytes += len(buf) - pbar.update(my_bytes) - pbar.finish() - down.close() - except KeyboardInterrupt: - print("\nKeyboard interruption, remove current download and exit execution of script") - os.remove(path) - sys.exit(0) - - # Check if file is valid - print("Check if file is valid: ") - valid = self._is_valid(path) - - if not valid: - downloaded_failed.append(path) - print('invalid file is being deleted.') - os.remove(path) - else: - downloaded.append(path) + return filtered + + def _format_url(self, startindex, wkt_geometry, platform, date_filtering, **keywords): + """Format the search URL based on the arguments + + Args: + wkt_geometry: Geometry in Wkt representation + platform: Satellite to search in + dateFiltering: filter of dates + **keywords: Further search parameters from ESA Data Hub + + Returns: + url: String URL to search for this data + + """ + with wkt2vector(wkt_geometry, srs=4326) as vec: + bbox = vec.bbox().convert2wkt()[0] - return {'success': downloaded, 'failed': downloaded_failed} + query_area = ' AND (footprint:"Intersects(%s)")' % bbox + filters = '' + for kw in sorted(keywords.keys()): + filters += ' AND (%s:%s)' % (kw, keywords[kw]) + + url = os.path.join(self.__esa_api_url, + 'search?format=json&rows=100&start=%s&q=%s%s%s%s' % + (startindex, platform, date_filtering, query_area, filters)) + return url @staticmethod def _is_valid(zipfile, minsize=1000000): @@ -335,64 +422,27 @@ def _is_valid(zipfile, minsize=1000000): print('file seems to be valid.') return not corrupt - def _format_url(self, startindex, wkt_geometry, platform, date_filtering, **keywords): - """Format the search URL based on the arguments - - Args: - wkt_geometry: Geometry in Wkt representation - platform: Satellite to search in - dateFiltering: filter of dates - **keywords: Further search parameters from ESA Data Hub - - Returns: - url: String URL to search for this data - - """ - with wkt2vector(wkt_geometry, srs=4326) as vec: - bbox = vec.bbox().convert2wkt()[0] - - query_area = ' AND (footprint:"Intersects(%s)")' % bbox - filters = '' - for kw in sorted(keywords.keys()): - filters += ' AND (%s:%s)' % (kw, keywords[kw]) - - url = os.path.join(self.__esa_api_url, - 'search?format=json&rows=100&start=%s&q=%s%s%s%s' % - (startindex, platform, date_filtering, query_area, filters)) - return url - @staticmethod - def multipolygon2list(wkt): - geom = ogr.CreateGeometryFromWkt(wkt) - if geom.GetGeometryName() == 'MULTIPOLYGON': - return [x.ExportToWkt() for x in geom] - else: - return [geom.ExportToWkt()] - - def _search_request(self, url): - """Do the HTTP request to ESA Data Hub + def _merge_scenes(scenes1, scenes2): + """Merge scenes from two different lists using the 'id' keyword Args: - url: HTTP URL to request + scenes1: List of prior available scenes + scenes2: List of new scenes Returns: - List of scenes (result from _parseJSON method), empty list if an error occurred + Merged list of scenes """ - try: - content = requests.get(url, auth=(self.__esa_username, self.__esa_password), verify=True) - if not content.status_code // 100 == 2: - print('Error: API returned unexpected response {}:'.format(content.status_code)) - print(content.text) - return [] - result = self._parse_json(content.json()) - for item in result: - item['footprint'] = self.multipolygon2list(item['footprint'])[0] - return result + existing_ids = [] + for scene in scenes1: + existing_ids.append(scene['id']) - except requests.exceptions.RequestException as exc: - print('Error: {}'.format(exc)) - return [] + for scene in scenes2: + if not scene['id'] in existing_ids: + scenes1.append(scene) + + return scenes1 @staticmethod def _parse_json(obj): @@ -433,122 +483,34 @@ def _parse_json(obj): return scenes_dict - def _filter_existing(self, scenes): - """Filter scenes based on existing files in the define download directory and all further data directories - - Args: - scenes: List of scenes to be filtered - - Returns: - Filtered list of scenes - - """ - filtered = [] - dirs = self.__data_dirs + [self.__download_dir] - for scene in scenes: - exist = [os.path.isfile(os.path.join(dir, scene['title'] + '.zip')) for dir in dirs] - if not any(exist): - filtered.append(scene) - return filtered - - @staticmethod - def _filter_overlap(scenes, wkt_geometry, min_overlap=0.001): - """Filter scenes based on the minimum overlap to the area of interest - - Args: - scenes: List of scenes to filter - wkt_geometry: Wkt Geometry representation of the area of interest - min_overlap: Minimum overlap (0-1) in decimal format between scene geometry and area of interest - - Returns: - Filtered list of scenes - - """ - filtered = [] - - with wkt2vector(wkt_geometry, srs=4326) as vec1: - site_area = vec1.getArea() - for scene in scenes: - with wkt2vector(scene['footprint'], srs=4326) as vec2: - footprint_area = vec2.getArea() - inter = intersect(vec1, vec2) - if inter is not None: - intersect_area = inter.getArea() - overlap = intersect_area / site_area - inter.close() - else: - overlap = 0 - if overlap > min_overlap or ( - site_area / footprint_area > 1 and intersect_area / footprint_area > min_overlap): - scene['_script_overlap'] = overlap * 100 - filtered.append(scene) - - return filtered - - @staticmethod - def _merge_scenes(scenes1, scenes2): - """Merge scenes from two different lists using the 'id' keyword + def _search_request(self, url): + """Do the HTTP request to ESA Data Hub Args: - scenes1: List of prior available scenes - scenes2: List of new scenes + url: HTTP URL to request Returns: - Merged list of scenes + List of scenes (result from _parseJSON method), empty list if an error occurred """ - existing_ids = [] - for scene in scenes1: - existing_ids.append(scene['id']) - - for scene in scenes2: - if not scene['id'] in existing_ids: - scenes1.append(scene) + try: + content = requests.get(url, auth=(self.__esa_username, self.__esa_password), verify=True) + if not content.status_code // 100 == 2: + print('Error: API returned unexpected response {}:'.format(content.status_code)) + print(content.text) + return [] + result = self._parse_json(content.json()) + for item in result: + item['footprint'] = self.multipolygon2list(item['footprint'])[0] + return result - return scenes1 - - def _write_json(self, filename): - """Write JSON representation of scenes list to file - - Args: - filename: Path to file to write in - - """ - with open(filename, 'w') as outfile: - json.dump(self.__scenes, outfile) - return True - - def _write_download_wget(self, filename): - """Write bash file to download scene URLs based on wget software - Please note: User authentication to ESA Data Hub (username, password) is being stored in plain text! - - Args: - filename: Path to file to write in - - """ - with open(filename, 'w') as outfile: - for scene in self.__scenes: - out = 'wget -c -T120 --no-check-certificate --user="{}" --password="{}" -O {}.zip "{}"\n'\ - .format(self.__esa_username, self.__esa_password, - os.path.join(self.__download_dir, scene['title']), scene['url'].replace('$', '\$')) - - outfile.write(out) - - def _write_download_urls(self, filename): - """Write URLs of scenes to text file - - Args: - filename: Path to file to write in - - """ - with open(filename, 'w') as outfile: - for scene in self.__scenes: - outfile.write(scene['url'] + '\n') - return filename + except requests.exceptions.RequestException as exc: + print('Error: {}'.format(exc)) + return [] def _write_download_asf(self, filename): template = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'asf_template.py') - + with open(template, 'r') as temp: content = temp.read() pattern = r'^(?PS1[AB])_' \ @@ -581,6 +543,46 @@ def _write_download_asf(self, filename): content = content.replace("placeholder_targetdir", self.__download_dir) with open(filename, 'w') as out: out.write(content) + + def _write_download_urls(self, filename): + """Write URLs of scenes to text file + + Args: + filename: Path to file to write in + + """ + with open(filename, 'w') as outfile: + for scene in self.__scenes: + outfile.write(scene['url'] + '\n') + return filename + + def _write_download_wget(self, filename): + """Write bash file to download scene URLs based on wget software + Please note: User authentication to ESA Data Hub (username, password) is being stored in plain text! + + Args: + filename: Path to file to write in + + """ + with open(filename, 'w') as outfile: + for scene in self.__scenes: + out = 'wget -c -T120 --no-check-certificate --user="{}" --password="{}" -O {}.zip "{}"\n' \ + .format(self.__esa_username, self.__esa_password, + os.path.join(self.__download_dir, scene['title']), scene['url'].replace('$', '\$')) + + outfile.write(out) + + def _write_json(self, filename): + """Write JSON representation of scenes list to file + + Args: + filename: Path to file to write in + + """ + with open(filename, 'w') as outfile: + json.dump(self.__scenes, outfile) + return True + ########################################################### # Example use of class From 4544ac918f10666f52c8a9608687648dc841ee27 Mon Sep 17 00:00:00 2001 From: John Truckenbrodt Date: Thu, 17 Oct 2019 15:40:36 +0200 Subject: [PATCH 8/8] [SentinelDownloader.write_results] extended docstring to new option --- sentinel_api/sentinel_api.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sentinel_api/sentinel_api.py b/sentinel_api/sentinel_api.py index 0b3c18d..79e094c 100644 --- a/sentinel_api/sentinel_api.py +++ b/sentinel_api/sentinel_api.py @@ -296,8 +296,11 @@ def write_results(self, file_type, filename, output=False): """Write results to disk in different kind of formats Args: - file_type: Use 'wget' to write download bash file with wget software, 'json' to write the dictionary object - to file, or 'url' to write a file with downloadable URLs + file_type: the file format to use: + - 'wget': download bash file with wget software + - 'json': write the dictionary object + - 'url': a file with downloadable URLs + - 'asf': a Python script for download from ASF Vertex filename: Path to file output: If True the written file will also be send to stdout (Default: False)