From 138364f7ae49178e4b0c56665381cb9269237d8a Mon Sep 17 00:00:00 2001 From: Pieter van der Meulen Date: Mon, 12 Oct 2015 16:35:48 +0200 Subject: [PATCH 1/3] Don't throw and thus trigger the @retry decorator when a local file cannot be found, exit immedately --- src/pyff/mdrepo.py | 2 ++ src/pyff/utils.py | 8 +++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/pyff/mdrepo.py b/src/pyff/mdrepo.py index 7497aaef..3692ae9e 100644 --- a/src/pyff/mdrepo.py +++ b/src/pyff/mdrepo.py @@ -474,6 +474,8 @@ def _process_url(rurl, verifier, tid, post, enable_cache=True): tries.setdefault(rurl, 0) resource = load_url(rurl, timeout=timeout, enable_cache=enable_cache) + if (not resource.result): + raise MetadataException("error fetching '%s'" % rurl) xml = resource.result.strip() retry_resources = [] info = { diff --git a/src/pyff/utils.py b/src/pyff/utils.py index 8e766016..284dfdd0 100644 --- a/src/pyff/utils.py +++ b/src/pyff/utils.py @@ -273,7 +273,13 @@ def load_url(url, enable_cache=True, timeout=60): if url.startswith('file://'): path = url[7:] if not os.path.exists(path): - raise IOError("file not found: %s" % path) + log.error("file not found: %s" % path) + return _Resource(result=None, + cached=False, + date=None, + resp=None, + time=None, + last_modified=None) with io.open(path, 'r+b') as fd: return _Resource(result=fd.read(), From 36e7addf882d75ba6ac5835c82ac9272e97ff8c7 Mon Sep 17 00:00:00 2001 From: Pieter van der Meulen Date: Mon, 12 Oct 2015 16:42:11 +0200 Subject: [PATCH 2/3] Add 'fail_on_error' and 'filter_invalid' options to the load command --- src/pyff/builtins.py | 47 +++++++++++++++++++++++++++++++++++++------- src/pyff/mdrepo.py | 36 ++++++++++++++++++++++----------- 2 files changed, 65 insertions(+), 18 deletions(-) diff --git a/src/pyff/builtins.py b/src/pyff/builtins.py index 5c3011a5..70fb414e 100644 --- a/src/pyff/builtins.py +++ b/src/pyff/builtins.py @@ -389,18 +389,46 @@ def load(req, *opts): """ General-purpose resource fetcher. - :param opts: :param req: The request - :param opts: Options: [qsize <5>] [timeout <30>] [validate ] + :param opts: Options: See "Options" below :return: None -Supports both remote and local resources. Fetching remote resources is done in parallell using threads. +Supports both remote and local resources. Fetching remote resources is done in parallel using threads. + +Note: When downloading remote files over HTTPS the TLS server certificate is not validated. +Note: Default behaviour is to ignore metadata files or entities in MD files that cannot be loaded + +Options are put directly after "load". E.g: + +.. code-block:: yaml + + - load fail_on_error True filter_invalid False: + - http://example.com/some_remote_metadata.xml + - local_file.xml + - /opt/directory_containing_md_files/ + +**Options** +Defaults are marked with (*) +- max_workers <5> : Number of parallel threads to use for loading MD files +- timeout <120> : Socket timeout when downloading files +- validate : When true downloaded metadata files are validated (schema validation) +- fail_on_error : Control whether an error during download, parsing or (optional)validatation of a MD file + does not abort processing of the pipeline. When true a failure aborts and causes pyff + to exit with a non zero exit code. Otherwise errors are logged but ignored. +- filter_invalid : Controls validation behaviour. When true Entities that fail validation are filtered + I.e. are not loaded. When false the entire metadata file is either loaded, or not. + fail_on_error controls whether failure to validating the entire MD file will abort + processing of the pipeline. """ opts = dict(zip(opts[::2], opts[1::2])) opts.setdefault('timeout', 120) opts.setdefault('max_workers', 5) opts.setdefault('validate', "True") + opts.setdefault('fail_on_error', "False") + opts.setdefault('filter_invalid', "True") opts['validate'] = bool(strtobool(opts['validate'])) + opts['fail_on_error'] = bool(strtobool(opts['fail_on_error'])) + opts['filter_invalid'] = bool(strtobool(opts['filter_invalid'])) remote = [] for x in req.args: @@ -438,15 +466,20 @@ def load(req, *opts): elif os.path.exists(url): if os.path.isdir(url): log.debug("directory %s verify %s as %s via %s" % (url, params['verify'], params['as'], params['via'])) - req.md.load_dir(url, url=params['as'], validate=opts['validate'], post=post) + req.md.load_dir(url, url=params['as'], validate=opts['validate'], post=post, fail_on_error=opts['fail_on_error'], filter_invalid=opts['filter_invalid']) elif os.path.isfile(url): log.debug("file %s verify %s as %s via %s" % (url, params['verify'], params['as'], params['via'])) remote.append(("file://%s" % url, params['verify'], params['as'], post)) else: - log.error("Unknown file type for load: '%s'" % url) + error="Unknown file type for load: '%s'" % url + if opts['fail_on_error']: + raise PipeException(error) + log.error(error) else: - log.error("Don't know how to load '%s' as %s verify %s via %s" % - (url, params['as'], params['verify'], params['via'])) + error="Don't know how to load '%s' as %s verify %s via %s (file does not exist?)" % (url, params['as'], params['verify'], params['via']) + if opts['fail_on_error']: + raise PipeException(error) + log.error(error) req.md.fetch_metadata(remote, **opts) diff --git a/src/pyff/mdrepo.py b/src/pyff/mdrepo.py index 3692ae9e..9e3e31fa 100644 --- a/src/pyff/mdrepo.py +++ b/src/pyff/mdrepo.py @@ -441,7 +441,7 @@ def expiration(self, t): return None - def fetch_metadata(self, resources, max_workers=5, timeout=120, max_tries=5, validate=False): + def fetch_metadata(self, resources, max_workers=5, timeout=120, max_tries=5, validate=False, fail_on_error=False, filter_invalid=True): """Fetch a series of metadata URLs and optionally verify signatures. :param resources: A list of triples (url,cert-or-fingerprint,id, post-callback) @@ -465,15 +465,20 @@ def fetch_metadata(self, resources, max_workers=5, timeout=120, max_tries=5, val max_workers=max_workers, timeout=timeout, max_tries=max_tries, - validate=validate) + validate=validate, + fail_on_error=fail_on_error, + filter_invalid=filter_invalid ) - def _fetch_metadata(self, resources, max_workers=5, timeout=120, max_tries=5, validate=False): + def _fetch_metadata(self, resources, max_workers=5, timeout=120, max_tries=5, validate=False, fail_on_error=False, filter_invalid=True): tries = dict() def _process_url(rurl, verifier, tid, post, enable_cache=True): tries.setdefault(rurl, 0) - resource = load_url(rurl, timeout=timeout, enable_cache=enable_cache) + try: + resource = load_url(rurl, timeout=timeout, enable_cache=enable_cache) + except Exception, ex: + raise MetadataException(ex, "Exception fetching '%s': %s" % (rurl, str(ex)) ) if (not resource.result): raise MetadataException("error fetching '%s'" % rurl) xml = resource.result.strip() @@ -509,6 +514,8 @@ def _process_url(rurl, verifier, tid, post, enable_cache=True): t, offset = self.parse_metadata(StringIO(xml), key=verifier, base_url=rurl, + fail_on_error=fail_on_error, + filter_invalid=filter_invalid, validate=validate, validation_errors=info['Validation Errors'], expiration=self.expiration, @@ -573,7 +580,11 @@ def _process_url(rurl, verifier, tid, post, enable_cache=True): for future in futures.as_completed(future_to_url): url = future_to_url[future] if future.exception() is not None: - log.error('fetching %r generated an exception: %s' % (url, future.exception())) + if fail_on_error: + log.error('fetching %r generated an exception' % url) + raise future.exception() + else: + log.error('fetching %r generated an exception: %s' % (url, future.exception())) else: next_resources.extend(future.result()) resources = next_resources @@ -656,8 +667,8 @@ def parse_metadata(self, try: validate_document(t) except DocumentInvalid, ex: - raise MetadataException("schema validation failed: '%s': %s" % - (base_url, xml_error(ex.error_log, m=base_url))) + raise MetadataException("schema validation failed: [%s] '%s': %s" % + (base_url, source, xml_error(ex.error_log, m=base_url))) if t is not None: if t.tag == "{%s}EntityDescriptor" % NS['md']: @@ -667,10 +678,10 @@ def parse_metadata(self, t = post(t) except Exception, ex: - traceback.print_exc(ex) - log.error(ex) if fail_on_error: raise ex + traceback.print_exc(ex) + log.error(ex) return None, None if log.isDebugEnabled(): @@ -678,7 +689,7 @@ def parse_metadata(self, return t, valid_until - def load_dir(self, directory, ext=".xml", url=None, validate=False, post=None, description=None): + def load_dir(self, directory, ext=".xml", url=None, validate=False, post=None, description=None, fail_on_error=True, filter_invalid=True): """ :param directory: A directory to walk. :param ext: Include files with this extension (default .xml) @@ -706,7 +717,8 @@ def load_dir(self, directory, ext=".xml", url=None, validate=False, post=None, d validation_errors = dict() t, valid_until = self.parse_metadata(fn, base_url=url, - fail_on_error=True, + fail_on_error=fail_on_error, + filter_invalid=filter_invalid, validate=validate, validation_errors=validation_errors, post=post) @@ -714,6 +726,8 @@ def load_dir(self, directory, ext=".xml", url=None, validate=False, post=None, d for (eid, error) in validation_errors.iteritems(): log.error(error) except Exception, ex: + if fail_on_error: + raise MetadataException('Error parsing "%s": %s' % (fn, str(ex))) log.error(ex) if entities: From cc823effe24776c9142cad37a7aef1e01c8cc0da Mon Sep 17 00:00:00 2001 From: Pieter van der Meulen Date: Mon, 12 Oct 2015 17:24:23 +0200 Subject: [PATCH 3/3] Add 'LoadErrorTest' to test the 'fail_on_error' and 'filter_invalid' options --- .../test/data/metadata/test03-invalid.xml | 353 ++++++++++++++++++ src/pyff/test/test_pipeline.py | 237 +++++++++++- 2 files changed, 589 insertions(+), 1 deletion(-) create mode 100644 src/pyff/test/data/metadata/test03-invalid.xml diff --git a/src/pyff/test/data/metadata/test03-invalid.xml b/src/pyff/test/data/metadata/test03-invalid.xml new file mode 100644 index 00000000..0de6ed92 --- /dev/null +++ b/src/pyff/test/data/metadata/test03-invalid.xml @@ -0,0 +1,353 @@ + + + + + + + + renater.fr + + + + MIIDHzCCAgegAwIBAgIUMAs4iqt8Q3Y62N0yGJvfaAf63aowDQYJKoZIhvcNAQEF + BQAwGDEWMBQGA1UEAxMNYWEucmVuYXRlci5mcjAeFw0xMzEwMzAwOTU0MDVaFw0z + MzEwMzAwOTU0MDVaMBgxFjAUBgNVBAMTDWFhLnJlbmF0ZXIuZnIwggEiMA0GCSqG + SIb3DQEBAQUAA4IBDwAwggEKAoIBAQCfrWzXms4zzJ7AlwCwqXMTk5jqlZczs/J2 + /sbmoNiKGWTPau6gXRczcfjRbCksOoSQpm3nKs2w2vvaPWFmz7BasY3o2JUqfzwc + lrczPo5rjytFjyilaY4Sa40UoKpRNyRPzgTypNKIPCiWyy69Do250oW3S8ZgNOKF + 1JwB+2wpPqYndIs6RPD7EqCu1KE71JyRqrtFtfrVhTSujPI1V6DX+abekb3pc00S + f0l36TJkQ6NwvsBXanM5/loHz9Onq+K7Tt0Ri1qPew/oh0I4HXz2Mp9CLAhOt5mf + j1tQLo/wWppupSvgKF6cAUo/Xn/quACt2SDoPNdgv/9TT7/G1vu3AgMBAAGjYTBf + MD4GA1UdEQQ3MDWCDWFhLnJlbmF0ZXIuZnKGJGh0dHBzOi8vYWEucmVuYXRlci5m + ci9pZHAvc2hpYmJvbGV0aDAdBgNVHQ4EFgQU7XooEdAeHI76c9U8XB/G4FwRaqUw + DQYJKoZIhvcNAQEFBQADggEBACzMPXRnXhLXqKeQIIWQsU9pL7uL60iJUV/Ii5+P + 0Aqs/oziyyfLO7iyXz7cvSattYJm3XB7YjcfcjaJ084Bwc74tPnH4KguVzEqmC/r + 3b8ilpFW9+OwZLKWQ2y7Ah+79RNPWredkugJFxhQZBoVk6HMXip+Wi16Cz/ZYNun + tjrXmoMLkYh+bbKjLpRPsx5BgxF8wx384iclFmLBhfVR3qhnB8uVVr185V57YE5s + JEKOK3YJzr7N8sQAOgM92tsnxcm8zhgx5z/BReCdCBlELBrF7ngamQLyUA4D9FzI + tglFHSkQxwqrKzmfJmZgbx0/OqJE8JAhrD+K/hyhu89/TM4= + + + + + urn:oasis:names:tc:SAML:1.1:nameid-format:emailAddress + + + + + + + + + https://services.renater.fr/federation/en/metadata_registration_practice_statement + + + + https://federation.renater.fr/category/web + https://federation.renater.fr/scope/national + + + + + + + A-DSI - site institutionnel + https://www.a-dsi.fr + A-DSI - site institutionnel + Site web de + l'association + + + + + + + MIIDATCCAemgAwIBAgIJAKi2EWQfMqF7MA0GCSqGSIb3DQEBBQUAMBcxFTATBgNV + BAMTDHd3dy5hLWRzaS5mcjAeFw0xNDEwMjEwOTA1NDZaFw0xNzEwMjAwOTA1NDZa + MBcxFTATBgNVBAMTDHd3dy5hLWRzaS5mcjCCASIwDQYJKoZIhvcNAQEBBQADggEP + ADCCAQoCggEBAPftB/HSNchQWFHjFKbPZ05tkOabR/tIbwP1n4rjA9jQ1GJhA5kA + 6sGFeR2PfGKCPdodjdV7NVlmK0ZsfYgEjHbt8A5AWZjZeIltiMce9QraXkaKIAo8 + QAV5laD7vlGIWZmjO4U2c65dfEUj8wu3BmfNv7Y0YhBe8Jf+WkclaE5jzC8j8ljK + ZuHdGFC41XVzP8nhdZ2QjzbVWjHqrswSHy3GcDjq5/+7J5cRhaoFTF4gAS0qDdgf + 7BeBVnffNMMX62FGYyIPsZ5Bvac/1y1sQ0fWi5074OFNR/7t+XERw81AqGMgNhYD + gw2zZ6nX7EauWF/p6HABRWK7D3iaiyKzOhECAwEAAaNQME4wLQYDVR0RBCYwJIIM + d3d3LmEtZHNpLmZyhhRodHRwczovL3d3dy5hLWRzaS5mcjAdBgNVHQ4EFgQUON56 + BD54tzr97r9i/HhE6TZX+EwwDQYJKoZIhvcNAQEFBQADggEBAOKvai2kMi7uTyne + BxvFafXe3XeLuLG7WtCpP3dRjUGyimOS0VZqRemZYHCq+tfOGPXqd/nQE53DHWae + vODCRzDWcopbx3j/LJdmGs2QHDai0PhA/4MuvYxWqeA4v1jWXVabksTzlnCr6CC0 + 51O++G1waGajTWhGguX+/pVG7v+UhZ+0U9SHKkJojJzxaODt3oHTlPcEVsAuXvcV + 2qb4z8Lkj5yRdF2NSbJuUqQ4jHSumWdI+P2084p7fJDCE86ntUzjHdrYW7kkxuno + 9d1w7yfssBdcpGIzy4Klqrdgntlu5QFLGUmqVQbbLu9m3aBmswKpbampw3l+TwWk + H5WCwvM= + + + + + + + + MIIDATCCAemgAwIBAgIJAKi2EWQfMqF7MA0GCSqGSIb3DQEBBQUAMBcxFTATBgNV + BAMTDHd3dy5hLWRzaS5mcjAeFw0xNDEwMjEwOTA1NDZaFw0xNzEwMjAwOTA1NDZa + MBcxFTATBgNVBAMTDHd3dy5hLWRzaS5mcjCCASIwDQYJKoZIhvcNAQEBBQADggEP + ADCCAQoCggEBAPftB/HSNchQWFHjFKbPZ05tkOabR/tIbwP1n4rjA9jQ1GJhA5kA + 6sGFeR2PfGKCPdodjdV7NVlmK0ZsfYgEjHbt8A5AWZjZeIltiMce9QraXkaKIAo8 + QAV5laD7vlGIWZmjO4U2c65dfEUj8wu3BmfNv7Y0YhBe8Jf+WkclaE5jzC8j8ljK + ZuHdGFC41XVzP8nhdZ2QjzbVWjHqrswSHy3GcDjq5/+7J5cRhaoFTF4gAS0qDdgf + 7BeBVnffNMMX62FGYyIPsZ5Bvac/1y1sQ0fWi5074OFNR/7t+XERw81AqGMgNhYD + gw2zZ6nX7EauWF/p6HABRWK7D3iaiyKzOhECAwEAAaNQME4wLQYDVR0RBCYwJIIM + d3d3LmEtZHNpLmZyhhRodHRwczovL3d3dy5hLWRzaS5mcjAdBgNVHQ4EFgQUON56 + BD54tzr97r9i/HhE6TZX+EwwDQYJKoZIhvcNAQEFBQADggEBAOKvai2kMi7uTyne + BxvFafXe3XeLuLG7WtCpP3dRjUGyimOS0VZqRemZYHCq+tfOGPXqd/nQE53DHWae + vODCRzDWcopbx3j/LJdmGs2QHDai0PhA/4MuvYxWqeA4v1jWXVabksTzlnCr6CC0 + 51O++G1waGajTWhGguX+/pVG7v+UhZ+0U9SHKkJojJzxaODt3oHTlPcEVsAuXvcV + 2qb4z8Lkj5yRdF2NSbJuUqQ4jHSumWdI+P2084p7fJDCE86ntUzjHdrYW7kkxuno + 9d1w7yfssBdcpGIzy4Klqrdgntlu5QFLGUmqVQbbLu9m3aBmswKpbampw3l+TwWk + H5WCwvM= + + + + + + + A-DSI - site institutionnel + Site web de l'association + + + + + + + + A-DSI + A-DSI + + + + + Yoann Mitaine + yoann.mitaine@u-grenoble3.fr + + + Gregory Mathes + gregory.mathes@upmf-grenoble.fr + + + + + + + + + https://services.renater.fr/federation/en/metadata_registration_practice_statement + + + + + abes.fr + + ABES - French Bibliographic Agency for Higher + Education +  + Agency staff + ABES - Agence Bibliographique de + l'Enseignement Supérieur + Personnels de l'Agence + + + + + + + MIIDJDCCAgygAwIBAgIVAMJyfTXyWtg2HoJZYvPzC5uTOkpLMA0GCSqGSIb3DQEB + BQUAMBkxFzAVBgNVBAMMDnNoYXJhdi5hYmVzLmZyMB4XDTE1MDMxNjA5MzYxM1oX + DTM1MDMxNjA5MzYxM1owGTEXMBUGA1UEAwwOc2hhcmF2LmFiZXMuZnIwggEiMA0G + CSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDTcLz3fcz0nnGZBl8dx+gsKgz2nazH + qLl58rvOUGQdcZqWutb0rmSlLv14aLJcSZiCse01Xa3VTdNboftLB9TzHgsRhy6E + eR5ISWBs4Hdw2bdbaV4KQb5zh8Mrjt/CO6yVUE2cr0bVPZKB2dGyblpdwQkdyakA + Dz2GuTIW2egZ466y0FaOTHzQ4jw3cGtOm0r1v7aMOLQg0tJQ3Ao8we36p+T/YrKN + qrt2kQbfbWMQz9rZsWLpI6YjCwImnTHOov+Sphq1QN/f3beZUM66hHBjTWytgZoB + tRhPzG02XGwY5FPYp/+xxU2uBUyJTdzON+J+/jK8CXlmLh6Et8qCH2mnAgMBAAGj + YzBhMB0GA1UdDgQWBBTxc6ftLuPBfgE21OFh7hYWV5sVODBABgNVHREEOTA3gg5z + aGFyYXYuYWJlcy5mcoYlaHR0cHM6Ly9zaGFyYXYuYWJlcy5mci9pZHAvc2hpYmJv + bGV0aDANBgkqhkiG9w0BAQUFAAOCAQEAbxeaU+peTP/+ZEadJxgffuRWU3L+qZnc + l36JFO9Rn1BFiABemGGUd1WU0AxNVJ0HDu3NZjKmQz3dYIjy5DR4nF9dDS25br3R + yUPa2NSIAatSB4BPbf8j+EpWrcuBBRXGGWXfvOTQq/DuKU9/FPGEvrIW5Wnl/uDd + kCI8+2KPpfuz002xNQID4wKyUJy9jyuE/2cn2JA1iYB0MMc3uqIls8T8Wn1leWr+ + TG5a9eJRJRp7O8KekUdkn17tzldLodiuduABA/gncseSrDLBUJntriF3yGGbCdJz + 8Dg24giw4jehoGYeEf1DW9ACdySlZDzf6YPGNUfVY9kVNDnLi9Th2Q== + + + + + + + + urn:mace:shibboleth:1.0:nameIdentifier + urn:oasis:names:tc:SAML:2.0:nameid-format:transient + + + + + + ABES - Agence Bibliographique de l'Enseignement Supérieur + ABES - Agence Bibliographique de l'Enseignement Supérieur + http://unknown.site.com + + + p2i@abes.fr + + + + + + + + + https://services.renater.fr/federation/en/metadata_registration_practice_statement + + + + http://www.geant.net/uri/dataprotection-code-of-conduct/v1 + https://federation.renater.fr/category/elearning + https://federation.renater.fr/scope/community + + + + + + + ACM Digital Library + http://dl.acm.org + Full text of every article ever published by ACM + and bibliographic citations from major publishers in + computing. + ACM Digital Library + Full text of every article ever published by ACM + and bibliographic citations from major publishers in + computing. + http://www.acm.org/about/privacy-policy + + + + + + + MIIDBDCCAeygAwIBAgIJAMD4xoKFh5MoMA0GCSqGSIb3DQEBBQUAMBUxEzARBgNV + BAMTCmRsLmFjbS5vcmcwHhcNMTMwNzEwMTY0NjQ5WhcNMjMwNzA4MTY0NjQ5WjAV + MRMwEQYDVQQDEwpkbC5hY20ub3JnMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIB + CgKCAQEA0D83ObCYMf9mChQ1RcwAIdUcWcrfRu/xEAamnUi6XtiP5zpq2XZsP6kr + 8kDKyf1cFb61AG76LyCNpPXUjOSImtCggfO3wwjrM6cVxoLxpjNHwyekf7Qp5Xht + wQ76ME2mBZyyeeQGQADSpuiQU8ceEEuFVzE4odvsYjslUf5FdyB7DV3wHsaem0GI + NPHMgm1aWw06Rb6TJT4+q7UVldkbFGToX/MwX6BVVn36TMllUCoPI9uC6elBtSDA + pM5IX3xPGm+HWhzWMwNH6DbiEaWunL0mT4qz4S/8chofFCYnfBIghcIR2byi/LVq + JsimhFLkh5pzeaN481jUXqm4CzVMsQIDAQABo1cwVTA0BgNVHREELTArggpkbC5h + Y20ub3Jnhh1odHRwczovL2RsLmFjbS5vcmcvc2hpYmJvbGV0aDAdBgNVHQ4EFgQU + Ff2/iHzhnXS3ru+f8m3ffW6LB2YwDQYJKoZIhvcNAQEFBQADggEBAL3G7h2JsWau + Ug3rrbbyGlfnyj7m9c02h7liJw0ZWMSZAQADzPcaTXxjJd72e1bfVlsqoyix1yjt + DOOd0PzgT4OmUrAu7CLnKIA2488s1TwrJgCkSKKfKYNAiAkTxbzlsJXb13Tok+vK + C/sxD92ba9DO8uc/uqO8m874uHyE1o4aAn1L5AsWuDI7hdv1hMfktODq7rV1FTCT + WUzSvfdP6GjtLivq3zW8+NluIu9EMSIYx/OUl13pXUjDiHl5oWO7MJoc5c6lu6Tu + TVBr4ywIEShumuXNtgDlBgb41BHr7tqsLwbuAMbWH1ZWYFSz0AKVFkVxsmIjgWn1 + bYwfJV8VaIg= + + + + + + + + MIIDBDCCAeygAwIBAgIJAMD4xoKFh5MoMA0GCSqGSIb3DQEBBQUAMBUxEzARBgNV + BAMTCmRsLmFjbS5vcmcwHhcNMTMwNzEwMTY0NjQ5WhcNMjMwNzA4MTY0NjQ5WjAV + MRMwEQYDVQQDEwpkbC5hY20ub3JnMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIB + CgKCAQEA0D83ObCYMf9mChQ1RcwAIdUcWcrfRu/xEAamnUi6XtiP5zpq2XZsP6kr + 8kDKyf1cFb61AG76LyCNpPXUjOSImtCggfO3wwjrM6cVxoLxpjNHwyekf7Qp5Xht + wQ76ME2mBZyyeeQGQADSpuiQU8ceEEuFVzE4odvsYjslUf5FdyB7DV3wHsaem0GI + NPHMgm1aWw06Rb6TJT4+q7UVldkbFGToX/MwX6BVVn36TMllUCoPI9uC6elBtSDA + pM5IX3xPGm+HWhzWMwNH6DbiEaWunL0mT4qz4S/8chofFCYnfBIghcIR2byi/LVq + JsimhFLkh5pzeaN481jUXqm4CzVMsQIDAQABo1cwVTA0BgNVHREELTArggpkbC5h + Y20ub3Jnhh1odHRwczovL2RsLmFjbS5vcmcvc2hpYmJvbGV0aDAdBgNVHQ4EFgQU + Ff2/iHzhnXS3ru+f8m3ffW6LB2YwDQYJKoZIhvcNAQEFBQADggEBAL3G7h2JsWau + Ug3rrbbyGlfnyj7m9c02h7liJw0ZWMSZAQADzPcaTXxjJd72e1bfVlsqoyix1yjt + DOOd0PzgT4OmUrAu7CLnKIA2488s1TwrJgCkSKKfKYNAiAkTxbzlsJXb13Tok+vK + C/sxD92ba9DO8uc/uqO8m874uHyE1o4aAn1L5AsWuDI7hdv1hMfktODq7rV1FTCT + WUzSvfdP6GjtLivq3zW8+NluIu9EMSIYx/OUl13pXUjDiHl5oWO7MJoc5c6lu6Tu + TVBr4ywIEShumuXNtgDlBgb41BHr7tqsLwbuAMbWH1ZWYFSz0AKVFkVxsmIjgWn1 + bYwfJV8VaIg= + + + + + + + ACM Digital Library + ACM Digital Library + Full text of every article ever published by ACM and +bibliographic citations from major publishers in computing. + Full text of every article ever published by ACM and +bibliographic citations from major publishers in computing. + + + + + + Association for Computing Machinery + Association for Computing Machinery + http://unknown.site.com + + + dl-info@acm.org + + + + + + diff --git a/src/pyff/test/test_pipeline.py b/src/pyff/test/test_pipeline.py index 7f1c5ca7..555f4024 100644 --- a/src/pyff/test/test_pipeline.py +++ b/src/pyff/test/test_pipeline.py @@ -6,7 +6,7 @@ from mako.lookup import TemplateLookup from nose.plugins.skip import Skip import yaml -from pyff.mdrepo import MDRepository +from pyff.mdrepo import MDRepository, MetadataException from pyff.pipes import plumbing, Plumbing, PipeException from pyff.test import ExitException from StringIO import StringIO @@ -80,6 +80,241 @@ def parse_test(self): assert("removing 'https://idp.example.com/saml2/idp/metadata.php1': schema validation failed" in str(l)) +# To run all LoadErrorTests: ./setup.py test -s pyff.test.test_pipeline.LoadErrorTest +# To run individual test: ./setup.py test -s pyff.test.test_pipeline.LoadErrorTest.test_fail_on_error_no_file +class LoadErrorTest(PipeLineTest): + + # A File that does not exist must throw an error with fail_on_error=True + def test_fail_on_error_no_file(self): + self.output = tempfile.NamedTemporaryFile('w').name + with patch.multiple("sys", exit=self.sys_exit, stdout=StreamCapturing(sys.stdout), stderr=StreamCapturing(sys.stderr)): + from testfixtures import LogCapture + with LogCapture() as l: + try: + res, md = self.exec_pipeline(""" + - load fail_on_error True: + - %s/metadata/test01.xml + - %s/file_that_does_not_exist.xml + - select + - stats + """ % (self.datadir, self.datadir) ) + except PipeException, ex: + print ex + assert ("Don't know how to load" in str(ex)) + assert ("file_that_does_not_exist.xml" in str(ex)) + return True + finally: + if os.path.isfile(self.output): + os.unlink(self.output) + print sys.stdout.captured + print sys.stderr.captured + + assert "Expected PipeException" == False + + # A File that does not exist must throw an error with fail_on_error=True + def test_fail_on_error_no_file_url(self): + self.output = tempfile.NamedTemporaryFile('w').name + with patch.multiple("sys", exit=self.sys_exit, stdout=StreamCapturing(sys.stdout), stderr=StreamCapturing(sys.stderr)): + from testfixtures import LogCapture + with LogCapture() as l: + try: + res, md = self.exec_pipeline(""" + - load fail_on_error True: + - %s/metadata/test01.xml + - file://%s/file_that_does_not_exist.xml + - select + - stats + """ % (self.datadir, self.datadir) ) + except MetadataException, ex: + print ex + assert ("error fetching" in str(ex)) + assert ("file_that_does_not_exist.xml" in str(ex)) + return True + finally: + if os.path.isfile(self.output): + os.unlink(self.output) + print sys.stdout.captured + print sys.stderr.captured + + assert "Expected PipeException" == False + + + # An URL that cannot be downloaded must throw an error with fail_on_error=True + # Note: Due to load_url retries it takes 20s to complete this test + def test_fail_on_error_no_url(self): + self.output = tempfile.NamedTemporaryFile('w').name + with patch.multiple("sys", exit=self.sys_exit, stdout=StreamCapturing(sys.stdout), stderr=StreamCapturing(sys.stderr)): + from testfixtures import LogCapture + with LogCapture() as l: + try: + res, md = self.exec_pipeline(""" + - load fail_on_error True: + - %s/metadata/test01.xml + - http://127.0.0.1/does_not_exists.xml + - select + - stats + """ % (self.datadir) ) + except MetadataException, ex: + print ex + assert ("Exception fetching" in str(ex)) + assert ("http://127.0.0.1/does_not_exists.xml" in str(ex)) + return True + finally: + if os.path.isfile(self.output): + os.unlink(self.output) + print sys.stdout.captured + print sys.stderr.captured + + assert "Expected PipeException" == False + + # A file with invalid XML must throw an exception with fail_on_error True: + def test_fail_on_error_invalid_file(self): + self.output = tempfile.NamedTemporaryFile('w').name + with patch.multiple("sys", exit=self.sys_exit, stdout=StreamCapturing(sys.stdout), stderr=StreamCapturing(sys.stderr)): + from testfixtures import LogCapture + with LogCapture() as l: + try: + res, md = self.exec_pipeline(""" + - load fail_on_error True: + - %s/metadata/test01.xml + - %s/metadata/test02-invalid.xml + - select + - stats + """ % (self.datadir, self.datadir) ) + except MetadataException, ex: + print ex + assert ("no valid metadata found" in str(ex)) + assert ("/metadata/test02-invalid.xml" in str(ex)) + return True + finally: + if os.path.isfile(self.output): + os.unlink(self.output) + print sys.stdout.captured + print sys.stderr.captured + + assert "Expected MetadataException" == False + + # A directory with a file with invalid metadata must throw an exception with fail_on_error True and filter_invalid False: + def test_fail_on_error_invalid_dir(self): + self.output = tempfile.NamedTemporaryFile('w').name + with patch.multiple("sys", exit=self.sys_exit, stdout=StreamCapturing(sys.stdout), stderr=StreamCapturing(sys.stderr)): + from testfixtures import LogCapture + with LogCapture() as l: + try: + res, md = self.exec_pipeline(""" + - load fail_on_error True filter_invalid False: + - %s/metadata/ + - select + - stats + """ % (self.datadir) ) + except MetadataException, ex: + print ex + return True + finally: + if os.path.isfile(self.output): + os.unlink(self.output) + print sys.stdout.captured + print sys.stderr.captured + + assert "Expected MetadataException" == False + + # A file with invalid XML must not throw an exception by default (fail_on_error False): + def test_no_fail_on_error_invalid_file(self): + self.output = tempfile.NamedTemporaryFile('w').name + with patch.multiple("sys", exit=self.sys_exit, stdout=StreamCapturing(sys.stdout), stderr=StreamCapturing(sys.stderr)): + from testfixtures import LogCapture + with LogCapture() as l: + res, md = self.exec_pipeline(""" + - load: + - %s/metadata/test01.xml + - %s/metadata/test02-invalid.xml + - select + - stats + """ % (self.datadir, self.datadir) ) + print sys.stdout.captured + print sys.stderr.captured + if os.path.isfile(self.output): + os.unlink(self.output) + + # Loading an xml file with an invalid entity must throw when filter_invalid False and fail_on_error True + def test_fail_on_error_invalid_entity(self): + self.output = tempfile.NamedTemporaryFile('w').name + with patch.multiple("sys", exit=self.sys_exit, stdout=StreamCapturing(sys.stdout), stderr=StreamCapturing(sys.stderr)): + from testfixtures import LogCapture + with LogCapture() as l: + try: + res, md = self.exec_pipeline(""" + - load fail_on_error True filter_invalid False: + - %s/metadata/test01.xml + - %s/metadata/test03-invalid.xml + - select + - stats + """ % (self.datadir, self.datadir) ) + except MetadataException, ex: + print ex + assert ("schema validation failed" in str(ex)) + assert ("/metadata/test03-invalid.xml" in str(ex)) + return True + finally: + if os.path.isfile(self.output): + os.unlink(self.output) + print sys.stdout.captured + print sys.stderr.captured + + # Test default behaviour. Loading a file with an invalid entity must not raise an exception + def test_no_fail_on_error_invalid_entity(self): + self.output = tempfile.NamedTemporaryFile('w').name + with patch.multiple("sys", exit=self.sys_exit, stdout=StreamCapturing(sys.stdout), stderr=StreamCapturing(sys.stderr)): + from testfixtures import LogCapture + with LogCapture() as l: + res, md = self.exec_pipeline(""" + - load: + - %s/metadata/test01.xml + - %s/metadata/test03-invalid.xml + - select + - stats + """ % (self.datadir, self.datadir) ) + print sys.stdout.captured + print sys.stderr.captured + if os.path.isfile(self.output): + os.unlink(self.output) + + # When an invalid entity is filtered (filter_invalid True) it must not cause an exception, even if fail_on_error True + def test_no_fail_on_error_filtered_entity(self): + self.output = tempfile.NamedTemporaryFile('w').name + with patch.multiple("sys", exit=self.sys_exit, stdout=StreamCapturing(sys.stdout), stderr=StreamCapturing(sys.stderr)): + from testfixtures import LogCapture + with LogCapture() as l: + res, md = self.exec_pipeline(""" + - load fail_on_error True filter_invalid True: + - %s/metadata/test01.xml + - %s/metadata/test03-invalid.xml + - select + - stats + """ % (self.datadir, self.datadir) ) + print sys.stdout.captured + print sys.stderr.captured + if os.path.isfile(self.output): + os.unlink(self.output) + + # A directory with a file with invalid metadata must not throw by default: + def test_no_fail_on_error_invalid_dir(self): + self.output = tempfile.NamedTemporaryFile('w').name + with patch.multiple("sys", exit=self.sys_exit, stdout=StreamCapturing(sys.stdout), stderr=StreamCapturing(sys.stderr)): + from testfixtures import LogCapture + with LogCapture() as l: + + res, md = self.exec_pipeline(""" + - load: + - %s/metadata/ + - select + - stats + """ % (self.datadir) ) + if os.path.isfile(self.output): + os.unlink(self.output) + print sys.stdout.captured + print sys.stderr.captured + # noinspection PyUnresolvedReferences class SigningTest(PipeLineTest): def test_signing(self):