diff --git a/.gitignore b/.gitignore index 23174ea..3fcf35b 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,7 @@ notes*.md stash*.* setup.sh .pypirc +data/ # Distribution / packaging build/ diff --git a/.travis.yml b/.travis.yml index 4502701..40a4680 100644 --- a/.travis.yml +++ b/.travis.yml @@ -20,8 +20,6 @@ matrix: env: TOXENV=coverage - python: 3.6 env: TOXENV=coveralls - - python: 3.6 - env: TOXENV=flake8 - python: 3.6 env: TOXENV=packaging - python: 3.6 diff --git a/HISTORY.md b/HISTORY.md index 01e1e07..f6577fb 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,6 +1,16 @@ .. :changelog: -0.1.0 - Marietta Blau (2019-05-20) +0.2.0 - (2019-06-17) - Ida Pfeiffer ------------------------------------ -- First release on PyPI. +[Release](https://github.com/AUSSDA/pyDataverse/releases/tag/v0.2.0) + +0.1.1 - (2019-05-28) +------------------------------------ + +[Release](https://github.com/AUSSDA/pyDataverse/releases/tag/v0.1.1) + +0.1.0 - (2019-05-20) - Marietta Blau +------------------------------------ + +[Release](https://github.com/AUSSDA/pyDataverse/releases/tag/v0.1.0) diff --git a/README.md b/README.md index 69d9159..3c917f1 100644 --- a/README.md +++ b/README.md @@ -1,18 +1,22 @@ -[![Build Status](https://travis-ci.com/AUSSDA/pyDataverse.svg?branch=master)](https://travis-ci.com/AUSSDA/pyDataverse) [![Coverage Status](https://coveralls.io/repos/github/AUSSDA/pyDataverse/badge.svg)](https://coveralls.io/github/AUSSDA/pyDataverse) [![Documentation Status](https://readthedocs.org/projects/pydataverse/badge/?version=latest)](https://pydataverse.readthedocs.io/en/latest) [![GitHub](https://img.shields.io/github/license/aussda/pydataverse.svg)](https://opensource.org/licenses/MIT) +[![PyPI](https://img.shields.io/pypi/v/pyDataverse.svg)](https://pypi.org/project/pyDataverse/) [![Build Status](https://travis-ci.com/AUSSDA/pyDataverse.svg?branch=master)](https://travis-ci.com/AUSSDA/pyDataverse) [![Coverage Status](https://coveralls.io/repos/github/AUSSDA/pyDataverse/badge.svg)](https://coveralls.io/github/AUSSDA/pyDataverse) [![Documentation Status](https://readthedocs.org/projects/pydataverse/badge/?version=latest)](https://pydataverse.readthedocs.io/en/latest) ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/pydataverse.svg) [![GitHub](https://img.shields.io/github/license/aussda/pydataverse.svg)](https://opensource.org/licenses/MIT) # pyDataverse -pyDataverse is a Python module for [Dataverse](http://dataverse.org/). It uses the [Native API](http://guides.dataverse.org/en/latest/api/native-api.html) and [Data Access API](http://guides.dataverse.org/en/latest/api/dataaccess.html). It allows to create, update and remove Dataverses, Datasets and Datafiles via Dataverse's native API. Thanks to the developers of [dataverse-client-python](https://github.com/IQSS/dataverse-client-python), from which the project got inspired from. +pyDataverse is a Python module for [Dataverse](http://dataverse.org). +It uses the [Dataverse API](http://guides.dataverse.org/en/latest/api/index.html) +and it's metadata data model to import, manipulate and export Dataverses, Datasets +and Datafiles. **Features** -* Open Source ([MIT](https://opensource.org/licenses/MIT)) -* `api.py`: Dataverse Api functionalities to create, get, publish and delete Dataverses, Datasets and Datafiles. -* `utils.py`: Functions to support the core functionalities. -* `exceptions.py`: Custom exceptions -* `tests/*`: Tests on [Travis CI](https://travis-ci.com/AUSSDA/pyDataverse) ([pytest](https://docs.pytest.org/en/latest/) + [tox](http://tox.readthedocs.io/)). -* [Documentation](https://pydataverse.readthedocs.io/en/latest/) +* Dataverse Api functionalities to create, get, publish and delete Dataverses, Datasets and Datafiles of your Dataverse instance via Api. +* Dataverse metadata model for easy manipulation and data conversion from and to other formats (e. g. Dataverse Api metadata JSON). +* Utils to support core functionalities. +* Custom exceptions +* Tests on [Travis CI](https://travis-ci.com/AUSSDA/pyDataverse) ([pytest](https://docs.pytest.org/en/latest/) + [tox](http://tox.readthedocs.io/)). +* [Documentation](https://pydataverse.readthedocs.io/en/latest/) (Sphinx, ReadTheDocs) * Python 2 and 3 (>=2.7) +* Open Source ([MIT](https://opensource.org/licenses/MIT)) **Copyright** diff --git a/setup.py b/setup.py index 733d2f0..324ee16 100644 --- a/setup.py +++ b/setup.py @@ -53,7 +53,7 @@ def run_tests(self): INSTALL_REQUIREMENTS = [ # A string or list of strings specifying what other distributions need to # be installed when this one is. - 'requests' + 'requests>=2.12.0' ] SETUP_REQUIREMENTS = [ diff --git a/src/pyDataverse/__init__.py b/src/pyDataverse/__init__.py index a0084dc..9a9f0cc 100644 --- a/src/pyDataverse/__init__.py +++ b/src/pyDataverse/__init__.py @@ -16,7 +16,7 @@ __email__ = 'stefan.kasberger@univie.ac.at' __copyright__ = 'Copyright (c) 2019 Stefan Kasberger' __license__ = 'MIT License' -__version__ = '0.1.1' +__version__ = '0.2.0' __url__ = 'https://github.com/AUSSDA/pyDataverse' __download_url__ = 'https://pypi.python.org/pypi/pyDataverse' -__description__ = 'A Python wrapper around the Dataverse API' +__description__ = 'A Python module for Dataverse.' diff --git a/src/pyDataverse/api.py b/src/pyDataverse/api.py index 5988dd7..2bebc05 100644 --- a/src/pyDataverse/api.py +++ b/src/pyDataverse/api.py @@ -1,6 +1,6 @@ # !/usr/bin/env python # -*- coding: utf-8 -*- -"""Find out more at https://github.com/AUSSDA/pyDataverse.""" +"""Dataverse API connector.""" from datetime import datetime import json from pyDataverse.exceptions import ApiAuthorizationError @@ -14,14 +14,10 @@ from requests import delete from requests import get from requests import post +from requests import put import subprocess as sp -""" -Connect and request the Dataverse API Endpoints. Save and use request results. -""" - - class Api(object): """API class. @@ -45,22 +41,32 @@ class Api(object): api_token api_version - Example - ---------- - Create an Api connection:: - - >>> base_url = 'http://demo.dataverse.org' - >>> api = Api(base_url) - >>> api.status - 'OK' - """ def __init__(self, base_url, api_token=None, api_version='v1'): - """Init an `Api()` class. + """Init an Api() class. Scheme, host and path combined create the base-url for the api. - See more about url at https://en.wikipedia.org/wiki/URL + See more about URL at `Wikipedia `_. + + Parameters + ---------- + base_url : string + Base url for Dataverse api. + api_token : string + Api token for Dataverse api. + api_version : string + Api version of Dataverse native api. Default is `v1`. + + Examples + ------- + Create an Api connection:: + + >>> from pyDataverse.api import Api + >>> base_url = 'http://demo.dataverse.org' + >>> api = Api(base_url) + >>> api.status + 'OK' """ # Check and set basic variables. @@ -117,7 +123,7 @@ def __str__(self): """ return 'pyDataverse API class' - def make_get_request(self, query_str, params=None, auth=False): + def get_request(self, query_str, params=None, auth=False): """Make a GET request. Parameters @@ -146,7 +152,7 @@ def make_get_request(self, query_str, params=None, auth=False): else: ApiAuthorizationError( 'ERROR: GET - Api token not passed to ' - '`make_get_request` {}.'.format(url) + '`get_request` {}.'.format(url) ) try: @@ -174,8 +180,8 @@ def make_get_request(self, query_str, params=None, auth=False): ''.format(url) ) - def make_post_request(self, query_str, metadata=None, auth=False, - params=None): + def post_request(self, query_str, metadata=None, auth=False, + params=None): """Make a POST request. Parameters @@ -206,7 +212,7 @@ def make_post_request(self, query_str, metadata=None, auth=False, else: ApiAuthorizationError( 'ERROR: POST - Api token not passed to ' - '`make_post_request` {}.'.format(url) + '`post_request` {}.'.format(url) ) try: @@ -228,7 +234,61 @@ def make_post_request(self, query_str, metadata=None, auth=False, ''.format(url) ) - def make_delete_request(self, query_str, auth=False, params=None): + def put_request(self, query_str, metadata=None, auth=False, + params=None): + """Make a PUT request. + + Parameters + ---------- + query_str : string + Query string for the request. Will be concatenated to + `native_api_base_url`. + metadata : string + Metadata as a json-formatted string. Defaults to `None`. + auth : bool + Should an api token be sent in the request. Defaults to `False`. + params : dict + Dictionary of parameters to be passed with the request. + Defaults to `None`. + + Returns + ------- + requests.Response + Response object of requests library. + + """ + url = '{0}{1}'.format(self.native_api_base_url, query_str) + if auth: + if self.api_token: + if not params: + params = {} + params['key'] = self.api_token + else: + ApiAuthorizationError( + 'ERROR: PUT - Api token not passed to ' + '`put_request` {}.'.format(url) + ) + + try: + resp = put( + url, + data=metadata, + params=params + ) + if resp.status_code == 401: + error_msg = resp.json()['message'] + raise ApiAuthorizationError( + 'ERROR: PUT HTTP 401 - Authorization error {0}. MSG: {1}' + ''.format(url, error_msg) + ) + return resp + except ConnectionError: + raise ConnectionError( + 'ERROR: PUT - Could not establish connection to api {}.' + ''.format(url) + ) + + def delete_request(self, query_str, auth=False, params=None): """Make a DELETE request. Parameters @@ -257,7 +317,7 @@ def make_delete_request(self, query_str, auth=False, params=None): else: ApiAuthorizationError( 'ERROR: DELETE - Api token not passed to ' - '`make_delete_request` {}.'.format(url) + '`delete_request` {}.'.format(url) ) try: @@ -275,17 +335,17 @@ def make_delete_request(self, query_str, auth=False, params=None): def get_dataverse(self, identifier, auth=False): """Get dataverse metadata by alias or id. - View data about the dataverse $identified by identifier. Identifier can - be the id number of the dataverse, its alias, or the special - value :root. + View metadata about a dataverse. + + .. code-block:: bash - GET http://$SERVER/api/dataverses/$id + GET http://$SERVER/api/dataverses/$id Parameters ---------- identifier : string - Can either be a dataverse id (long) or a dataverse alias (more - robust). + Can either be a dataverse id (long), a dataverse alias (more + robust), or the special value ``:root``. Returns ------- @@ -294,7 +354,7 @@ def get_dataverse(self, identifier, auth=False): """ query_str = '/dataverses/{0}'.format(identifier) - resp = self.make_get_request(query_str, auth=auth) + resp = self.get_request(query_str, auth=auth) return resp def create_dataverse(self, identifier, metadata, auth=True, @@ -302,18 +362,20 @@ def create_dataverse(self, identifier, metadata, auth=True, """Create a dataverse. Generates a new dataverse under identifier. Expects a JSON content - describing the dataverse, as in the example below. If identifier is - omitted, a root dataverse is created. $id can either be a dataverse id - (long) or a dataverse alias (more robust). + describing the dataverse. + + HTTP Request: - POST http://$SERVER/api/dataverses/$id?key=$apiKey + .. code-block:: bash - Download the JSON example file and modified to create dataverses to - suit your needs. The fields name, alias, and dataverseContacts are - required. http://guides.dataverse.org/en/latest/ - _downloads/dataverse-complete.json + POST http://$SERVER/api/dataverses/$id - resp.status_code: + Download the `dataverse.json `_ example file and modify to create + dataverses to suit your needs. The fields name, alias, and + dataverseContacts are required. + + Status Codes: 200: dataverse created 201: dataverse created @@ -321,14 +383,14 @@ def create_dataverse(self, identifier, metadata, auth=True, ---------- identifier : string Can either be a dataverse id (long) or a dataverse alias (more - robust). + robust). If identifier is omitted, a root dataverse is created. metadata : string Metadata of the Dataverse as a json-formatted string. auth : bool - True if api authorization is necessary. Defaults to `True`. + True if api authorization is necessary. Defaults to ``True``. parent : string Parent dataverse, if existing, to which the Dataverse gets attached - to. Defaults to `:root`. + to. Defaults to ``:root``. Returns ------- @@ -343,7 +405,7 @@ def create_dataverse(self, identifier, metadata, auth=True, ) query_str = '/dataverses/{0}'.format(parent) - resp = self.make_post_request(query_str, metadata, auth) + resp = self.post_request(query_str, metadata, auth) if resp.status_code == 404: error_msg = resp.json()['message'] @@ -366,10 +428,14 @@ def publish_dataverse(self, identifier, auth=True): Publish the Dataverse pointed by identifier, which can either by the dataverse alias or its numerical id. - POST http://$SERVER/api/dataverses/$identifier/actions/:publish + HTTP Request: - resp.status_code: - 200: dataverse published + .. code-block:: bash + + POST http://$SERVER/api/dataverses/$identifier/actions/:publish + + Status Code: + 200: Dataverse published Parameters ---------- @@ -377,7 +443,7 @@ def publish_dataverse(self, identifier, auth=True): Can either be a dataverse id (long) or a dataverse alias (more robust). auth : bool - True if api authorization is necessary. Defaults to `False`. + True if api authorization is necessary. Defaults to ``False``. Returns ------- @@ -386,7 +452,7 @@ def publish_dataverse(self, identifier, auth=True): """ query_str = '/dataverses/{0}/actions/:publish'.format(identifier) - resp = self.make_post_request(query_str, auth=auth) + resp = self.post_request(query_str, auth=auth) if resp.status_code == 401: error_msg = resp.json()['message'] @@ -414,11 +480,14 @@ def publish_dataverse(self, identifier, auth=True): def delete_dataverse(self, identifier, auth=True): """Delete dataverse by alias or id. - Deletes the dataverse whose ID is given: - DELETE http://$SERVER/api/dataverses/$id?key=$apiKey + HTTP Request: + + .. code-block:: bash - resp.status_code: - 200: dataverse deleted + DELETE http://$SERVER/api/dataverses/$id + + Status Code: + 200: Dataverse deleted Parameters ---------- @@ -433,7 +502,7 @@ def delete_dataverse(self, identifier, auth=True): """ query_str = '/dataverses/{0}'.format(identifier) - resp = self.make_delete_request(query_str, auth) + resp = self.delete_request(query_str, auth) if resp.status_code == 401: error_msg = resp.json()['message'] @@ -463,23 +532,30 @@ def delete_dataverse(self, identifier, auth=True): print('Dataverse {} deleted.'.format(identifier)) return resp - def get_dataset(self, identifier, auth=True, is_doi=True): - """Get metadata of dataset. + def get_dataset(self, identifier, auth=True, is_pid=True): + """Get metadata of a Dataset. With Dataverse identifier: + + .. code-block:: bash + GET http://$SERVER/api/datasets/$identifier - With PID: - GET http://$SERVER/api/datasets/:persistentId/?persistentId=$ID + + With persistent identifier: + + .. code-block:: bash + + GET http://$SERVER/api/datasets/:persistentId/?persistentId=$id GET http://$SERVER/api/datasets/:persistentId/ - ?persistentId=doi:10.5072/FK2/J8SJZB + ?persistentId=$pid Parameters ---------- identifier : string - Doi of the dataset. e.g. `doi:10.11587/8H3N93`. - is_doi : bool - Is the identifier a Doi? Defauls to `True`. So far, the module only - supports Doi's as PID's. + Identifier of the dataset. Can be a Dataverse identifier or a + persistent identifier (e.g. ``doi:10.11587/8H3N93``). + is_pid : bool + True, if identifier is a persistent identifier. Returns ------- @@ -487,30 +563,31 @@ def get_dataset(self, identifier, auth=True, is_doi=True): Response object of requests library. """ - if is_doi: + if is_pid: query_str = '/datasets/:persistentId/?persistentId={0}'.format( identifier) else: query_str = '/datasets/{0}'.format(identifier) - resp = self.make_get_request(query_str, auth=auth) + resp = self.get_request(query_str, auth=auth) return resp - def get_dataset_export(self, identifier, export_format): + def get_dataset_export(self, pid, export_format): """Get metadata of dataset exported in different formats. - CORS Export the metadata of the current published version of a dataset - in various formats: + Export the metadata of the current published version of a dataset + in various formats by its persistend identifier. + + .. code-block:: bash - GET http://$SERVER/api/datasets/ - export?exporter=ddi&persistentId=$persistentId + GET http://$SERVER/api/datasets/export?exporter=$exportformat&persistentId=$pid Parameters ---------- - identifier : string - Doi of the dataset. e.g. `doi:10.11587/8H3N93`. + pid : string + Persistent identifier of the dataset. (e.g. ``doi:10.11587/8H3N93``). export_format : string - Export format as a string. Formats: 'ddi', 'oai_ddi', 'dcterms', - 'oai_dc', 'schema.org', 'dataverse_json'. + Export format as a string. Formats: ``ddi``, ``oai_ddi``, + ``dcterms``, ``oai_dc``, ``schema.org``, ``dataverse_json``. Returns ------- @@ -519,50 +596,66 @@ def get_dataset_export(self, identifier, export_format): """ query_str = '/datasets/export?exporter={0}&persistentId={1}'.format( - export_format, identifier) - resp = self.make_get_request(query_str) + export_format, pid) + resp = self.get_request(query_str) return resp def create_dataset(self, dataverse, metadata, auth=True): """Add dataset to a dataverse. - http://guides.dataverse.org/en/latest/api/native-api.html#create-a-dataset-in-a-dataverse + `Dataverse Documentation + `_ + + HTTP Request: + + .. code-block:: bash + + POST http://$SERVER/api/dataverses/$dataverse/datasets --upload-file FILENAME + + Add new dataset with curl: - POST http://$SERVER/api/dataverses/$dataverse/datasets --upload-file - FILENAME + .. code-block:: bash - curl -H "X-Dataverse-key: $API_TOKEN" -X POST $SERVER_URL/api/ - dataverses/$DV_ALIAS/datasets/:import?pid=$PERSISTENT_IDENTIFIER& - release=yes --upload-file dataset.json - curl -H "X-Dataverse-key: $API_TOKEN" -X POST $SERVER_URL/api/ - dataverses/$DV_ALIAS/datasets --upload-file dataset-finch1.json + curl -H "X-Dataverse-key: $API_TOKEN" -X POST $SERVER_URL/api/dataverses/$DV_ALIAS/datasets --upload-file tests/data/dataset_min.json + + Import dataset with existing persistend identifier with curl: + + .. code-block:: bash + + curl -H "X-Dataverse-key: $API_TOKEN" -X POST $SERVER_URL/api/dataverses/$DV_ALIAS/datasets/:import?pid=$PERSISTENT_IDENTIFIER&release=yes --upload-file tests/data/dataset_min.json To create a dataset, you must create a JSON file containing all the - metadata you want such as in this example file: dataset-finch1.json. + metadata you want such as example file: `dataset-finch1.json + `_. Then, you must decide which dataverse to create the dataset in and target that datavese with either the "alias" of the dataverse (e.g. "root" or the database id of the dataverse (e.g. "1"). The initial version state will be set to DRAFT: - http://guides.dataverse.org/en/latest/_downloads/dataset-finch1.json - resp.status_code: + Status Code: 201: dataset created Parameters ---------- dataverse : string - Alias of dataverse to which the dataset should be added to. + "alias" of the dataverse (e.g. ``root``) or the database id of the + dataverse (e.g. ``1``) metadata : string - Metadata of the Dataset as a json-formatted string. + Metadata of the Dataset as a json-formatted string (e. g. + `dataset-finch1.json `_) Returns ------- requests.Response Response object of requests library. + Todo + ------- + Link Dataset finch1.json + """ query_str = '/dataverses/{0}/datasets'.format(dataverse) - resp = self.make_post_request(query_str, metadata, auth) + resp = self.post_request(query_str, metadata, auth) if resp.status_code == 404: error_msg = resp.json()['message'] @@ -580,7 +673,7 @@ def create_dataset(self, dataverse, metadata, auth=True): print('Dataset {} created.'.format(identifier)) return resp - def publish_dataset(self, identifier, type='minor', auth=True): + def publish_dataset(self, pid, type='minor', auth=True): """Publish dataset. Publishes the dataset whose id is passed. If this is the first version @@ -592,7 +685,11 @@ def publish_dataset(self, identifier, type='minor', auth=True): type=updatecurrent to update metadata without changing the version number. - POST http://$SERVER/api/datasets/$id/actions/:publish?type=$type + HTTP Request: + + .. code-block:: bash + + POST http://$SERVER/api/datasets/$id/actions/:publish?type=$type When there are no default workflows, a successful publication process will result in 200 OK response. When there are workflows, it is @@ -603,21 +700,22 @@ def publish_dataset(self, identifier, type='minor', auth=True): has to check the status of the dataset periodically, or perform some push request in the post-publish workflow. - resp.status_code: + Status Code: 200: dataset published Parameters ---------- - identifier : string - Doi of the dataset. e.g. `doi:10.11587/8H3N93`. + pid : string + Persistent identifier of the dataset (e.g. + ``doi:10.11587/8H3N93``). type : string - Passing `minor` increases the minor version number (2.3 is - updated to 2.4). - Passing `major` increases the major version number (2.3 is - updated to 3.0). Superusers can pass `updatecurrent` to update - metadata without changing the version number: + Passing ``minor`` increases the minor version number (2.3 is + updated to 2.4). Passing ``major`` increases the major version + number (2.3 is updated to 3.0). Superusers can pass + ``updatecurrent`` to update metadata without changing the version + number. auth : bool - True if api authorization is necessary. Defaults to `False`. + ``True`` if api authorization is necessary. Defaults to ``False``. Returns ------- @@ -627,7 +725,7 @@ def publish_dataset(self, identifier, type='minor', auth=True): """ query_str = '/datasets/:persistentId/actions/:publish' query_str += '?persistentId={0}&type={1}'.format(identifier, type) - resp = self.make_post_request(query_str, auth=auth) + resp = self.post_request(query_str, auth=auth) if resp.status_code == 404: error_msg = resp.json()['message'] @@ -643,19 +741,27 @@ def publish_dataset(self, identifier, type='minor', auth=True): print('Dataset {} published'.format(identifier)) return resp - def delete_dataset(self, identifier, auth=True): + def delete_dataset(self, identifier, is_pid=True, auth=True): """Delete a dataset. - Delete the dataset whose id is passed: - DELETE http://$SERVER/api/datasets/$id?key=$apiKey + Delete the dataset whose id is passed + + HTTP Request: - resp.status_code: + .. code-block:: bash + + DELETE http://$SERVER/api/datasets/$id + + Status Code: 200: dataset deleted Parameters ---------- identifier : string - Doi of the dataset. e.g. `doi:10.11587/8H3N93`. + Identifier of the dataset. Can be a Dataverse identifier or a + persistent identifier (e.g. ``doi:10.11587/8H3N93``). + is_pid : bool + True, if identifier is a persistent identifier. Returns ------- @@ -663,9 +769,12 @@ def delete_dataset(self, identifier, auth=True): Response object of requests library. """ - query_str = '/datasets/:persistentId/?persistentId={0}'.format( - identifier) - resp = self.make_delete_request(query_str, auth=auth) + if is_pid: + query_str = '/datasets/:persistentId/?persistentId={0}'.format( + identifier) + else: + query_str = '/datasets/{0}'.format(identifier) + resp = self.delete_request(query_str, auth=auth) if resp.status_code == 404: error_msg = resp.json()['message'] @@ -690,17 +799,104 @@ def delete_dataset(self, identifier, auth=True): print('Dataset {} deleted'.format(identifier)) return resp - def get_datafiles(self, doi, version='1'): + def edit_dataset_metadata(self, identifier, metadata, is_pid=True, + is_replace=False, auth=True): + """Edit metadata of a given dataset. + + `Offical documentation + `_. + + HTTP Request: + + .. code-block:: bash + + PUT http://$SERVER/api/datasets/editMetadata/$id --upload-file FILENAME + + Add data to dataset fields that are blank or accept multiple values with + the following + + CURL Request: + + .. code-block:: bash + + curl -H "X-Dataverse-key: $API_TOKEN" -X PUT $SERVER_URL/api/datasets/:persistentId/editMetadata/?persistentId=$pid --upload-file dataset-add-metadata.json + + For these edits your JSON file need only include those dataset fields + which you would like to edit. A sample JSON file may be downloaded + here: `dataset-edit-metadata-sample.json + `_ + + Parameters + ---------- + identifier : string + Identifier of the dataset. Can be a Dataverse identifier or a + persistent identifier (e.g. ``doi:10.11587/8H3N93``). + metadata : string + Metadata of the Dataset as a json-formatted string. + is_pid : bool + ``True`` to use persistent identifier. ``False``, if not. + is_replace : bool + ``True`` to replace already existing metadata. ``False``, if not. + auth : bool + ``True``, if an api token should be sent. Defaults to ``False``. + + Returns + ------- + requests.Response + Response object of requests library. + + Examples + ------- + Get dataset metadata:: + + >>> data = api.get_dataset_metadata(doi, auth=True) + >>> resp = api.edit_dataset_metadata(doi, data, is_replace=True, auth=True) + >>> resp.status_code + 200: metadata updated + + """ + if is_pid: + query_str = '/datasets/:persistentId/editMetadata/?persistentId={0}' + ''.format(identifier) + else: + query_str = '/datasets/editMetadata/{0}'.format(identifier) + params = {'replace': True} if is_replace else {} + + resp = self.put_request(query_str, metadata, auth, params) + + if resp.status_code == 401: + error_msg = resp.json()['message'] + raise ApiAuthorizationError( + 'ERROR: HTTP 401 - Updating metadata unauthorized. MSG: ' + ''.format(error_msg) + ) + elif resp.status_code == 400: + if 'Error parsing' in resp.json()['message']: + print('Wrong passed data format.') + else: + print('You may not add data to a field that already has data ' + + 'and does not allow multiples. ' + + 'Use is_replace=true to replace existing data.') + elif resp.status_code == 200: + print('Dataset {0} updated'.format(identifier)) + return resp + + def get_datafiles(self, pid, version='1'): """List metadata of all datafiles of a dataset. - http://guides.dataverse.org/en/latest/api/native-api.html#list-files-in-a-dataset - GET http://$SERVER/api/datasets/$id/versions/$versionId/ - files?key=$apiKey + `Documentation `_ + + HTTP Request: + + .. code-block:: bash + + GET http://$SERVER/api/datasets/$id/versions/$versionId/files Parameters ---------- - doi : string - Doi of the dataset. e.g. `doi:10.11587/8H3N93`. + pid : string + Persistent identifier of the dataset. e.g. ``doi:10.11587/8H3N93``. version : string Version of dataset. Defaults to `1`. @@ -711,23 +907,33 @@ def get_datafiles(self, doi, version='1'): """ base_str = '/datasets/:persistentId/versions/' - query_str = base_str+'{0}/files?persistentId={1}'.format(version, doi) - resp = self.make_get_request(query_str) + query_str = base_str + '{0}/files?persistentId={1}'.format( + version, pid) + resp = self.get_request(query_str) return resp - def get_datafile(self, identifier): + def get_datafile(self, identifier, is_pid=True): """Download a datafile via the Dataverse Data Access API. - File ID + Get by file id (HTTP Request). + + .. code-block:: bash + GET /api/access/datafile/$id - DOI - GET http://$SERVER/api/access/datafile/ - :persistentId/?persistentId=doi:10.5072/FK2/J8SJZB + + Get by persistent identifier (HTTP Request). + + .. code-block:: bash + + GET http://$SERVER/api/access/datafile/:persistentId/?persistentId=doi:10.5072/FK2/J8SJZB Parameters ---------- identifier : string - Doi of the dataset. e.g. `doi:10.11587/8H3N93`. + Identifier of the dataset. Can be datafile id or persistent + identifier of the datafile (e. g. doi). + is_pid : bool + ``True`` to use persistent identifier. ``False``, if not. Returns ------- @@ -735,14 +941,22 @@ def get_datafile(self, identifier): Response object of requests library. """ - query_str = '/access/datafile/{0}'.format(identifier) - resp = self.make_get_request(query_str) + if is_pid: + query_str = '/access/datafile/{0}'.format(identifier) + else: + query_str = '/access/datafile/:persistentId/?persistentId={0}' + ''.format(identifier) + resp = self.get_request(query_str) return resp def get_datafile_bundle(self, identifier): - """Download a datafile in all its formats via the Dataverse Data Access API. + """Download a datafile in all its formats. + + HTTP Request: + + .. code-block:: bash - GET /api/access/datafile/bundle/$id + GET /api/access/datafile/bundle/$id Data Access API calls can now be made using persistent identifiers (in addition to database ids). This is done by passing the constant @@ -763,7 +977,7 @@ def get_datafile_bundle(self, identifier): Parameters ---------- identifier : string - Doi of the dataset. e.g. `doi:10.11587/8H3N93`. + Identifier of the dataset. Returns ------- @@ -772,25 +986,32 @@ def get_datafile_bundle(self, identifier): """ query_str = '/access/datafile/bundle/{0}'.format(identifier) - data = self.make_get_request(query_str) + data = self.get_request(query_str) return data - def upload_file(self, identifier, filename): + def upload_file(self, identifier, filename, is_pid=True): """Add file to a dataset. Add a file to an existing Dataset. Description and tags are optional: - POST http://$SERVER/api/datasets/$id/add?key=$apiKey + + HTTP Request: + + .. code-block:: bash + + POST http://$SERVER/api/datasets/$id/add The upload endpoint checks the content of the file, compares it with existing files and tells if already in the database (most likely via - hashing) + hashing). Parameters ---------- identifier : string - Doi of the dataset. e.g. `doi:10.11587/8H3N93`. + Identifier of the dataset. filename : string Full filename with path. + is_pid : bool + ``True`` to use persistent identifier. ``False``, if not. Returns ------- @@ -800,13 +1021,16 @@ def upload_file(self, identifier, filename): """ query_str = self.native_api_base_url - query_str += '/datasets/:persistentId/add?persistentId={0}'.format( - identifier) + if is_pid: + query_str += '/datasets/:persistentId/add?persistentId={0}'.format( + identifier) + else: + query_str += '/datasets/{0}/add'.format(identifier) shell_command = 'curl -H "X-Dataverse-key: {0}"'.format( self.api_token) shell_command += ' -X POST {0} -F file=@{1}'.format( query_str, filename) - # TODO: is shell=True necessary? + # TODO(Shell): is shell=True necessary? result = sp.run(shell_command, shell=True, stdout=sp.PIPE) resp = json.loads(result.stdout) return resp @@ -814,10 +1038,14 @@ def upload_file(self, identifier, filename): def get_info_version(self): """Get the Dataverse version and build number. - The response contains the version and build numbers. + The response contains the version and build numbers. Requires no api + token. - Requires no api_token - GET http://$SERVER/api/info/version + HTTP Request: + + .. code-block:: bash + + GET http://$SERVER/api/info/version Returns ------- @@ -826,16 +1054,20 @@ def get_info_version(self): """ query_str = '/info/version' - resp = self.make_get_request(query_str) + resp = self.get_request(query_str) return resp def get_info_server(self): """Get dataverse server name. - This is useful when a Dataverse system is - composed of multiple Java EE servers behind a load balancer. + This is useful when a Dataverse system is composed of multiple Java EE + servers behind a load balancer. + + HTTP Request: - GET http://$SERVER/api/info/server + .. code-block:: bash + + GET http://$SERVER/api/info/server Returns ------- @@ -844,7 +1076,7 @@ def get_info_server(self): """ query_str = '/info/server' - resp = self.make_get_request(query_str) + resp = self.get_request(query_str) return resp def get_info_apiTermsOfUse(self): @@ -853,7 +1085,11 @@ def get_info_apiTermsOfUse(self): The response contains the text value inserted as API Terms of use which uses the database setting :ApiTermsOfUse. - GET http://$SERVER/api/info/apiTermsOfUse + HTTP Request: + + .. code-block:: bash + + GET http://$SERVER/api/info/apiTermsOfUse Returns ------- @@ -862,7 +1098,7 @@ def get_info_apiTermsOfUse(self): """ query_str = '/info/apiTermsOfUse' - resp = self.make_get_request(query_str) + resp = self.get_request(query_str) return resp def get_metadatablocks(self): @@ -870,7 +1106,11 @@ def get_metadatablocks(self): Lists brief info about all metadata blocks registered in the system. - GET http://$SERVER/api/metadatablocks + HTTP Request: + + .. code-block:: bash + + GET http://$SERVER/api/metadatablocks Returns ------- @@ -879,7 +1119,7 @@ def get_metadatablocks(self): """ query_str = '/metadatablocks' - resp = self.make_get_request(query_str) + resp = self.get_request(query_str) return resp def get_metadatablock(self, identifier): @@ -888,7 +1128,11 @@ def get_metadatablock(self, identifier): Returns data about the block whose identifier is passed. identifier can either be the block’s id, or its name. - GET http://$SERVER/api/metadatablocks/$identifier + HTTP Request: + + .. code-block:: bash + + GET http://$SERVER/api/metadatablocks/$identifier Parameters ---------- @@ -902,5 +1146,5 @@ def get_metadatablock(self, identifier): """ query_str = '/metadatablocks/{0}'.format(identifier) - resp = self.make_get_request(query_str) + resp = self.get_request(query_str) return resp diff --git a/src/pyDataverse/docs/source/conf.py b/src/pyDataverse/docs/source/conf.py index ae1e187..3ba9937 100644 --- a/src/pyDataverse/docs/source/conf.py +++ b/src/pyDataverse/docs/source/conf.py @@ -83,7 +83,6 @@ # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' - # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for diff --git a/src/pyDataverse/docs/source/developer.rst b/src/pyDataverse/docs/source/developer.rst index b3fe509..a7532c1 100644 --- a/src/pyDataverse/docs/source/developer.rst +++ b/src/pyDataverse/docs/source/developer.rst @@ -14,6 +14,13 @@ Api Interface :members: +Models Interface +----------------------------- + +.. automodule:: pyDataverse.models + :members: + + Utils Interface ----------------------------- @@ -35,6 +42,8 @@ Install from the local git repository, with all it's dependencies: .. code-block:: shell + git clone git@github.com:AUSSDA/pyDataverse.git + cd pyDataverse virtualenv venv source venv/bin/activate pip install -r tools/tests-requirements.txt @@ -86,19 +95,6 @@ When you only want to run one test, e.g. the py36 test: To find out more about which tests are available, have a look inside the tox.ini file. - -Documentation ------------------------------ - - -**Create Sphinx Docs** - -Use Sphinx to create class and function documentation out of the doc-strings. You can call it via `tox`. This creates the created docs inside `docs/build`. - -.. code-block:: shell - - tox -e docs - **Create Coverage Reports** Run tests with coverage to create html and xml reports as an output. Again, call it via `tox`. This creates the created docs inside `docs/coverage_html/`. @@ -114,3 +110,15 @@ To use Coveralls on local development: .. code-block:: shell tox -e coveralls + +Documentation +----------------------------- + + +**Create Sphinx Docs** + +Use Sphinx to create class and function documentation out of the doc-strings. You can call it via `tox`. This creates the created docs inside `docs/build`. + +.. code-block:: shell + + tox -e docs diff --git a/src/pyDataverse/docs/source/index.rst b/src/pyDataverse/docs/source/index.rst index a0d25ce..7fbbf80 100644 --- a/src/pyDataverse/docs/source/index.rst +++ b/src/pyDataverse/docs/source/index.rst @@ -27,9 +27,9 @@ Release v\ |version|. ------------------- pyDataverse is a Python module for `Dataverse `_. -It uses the `Native API `_ -and `Data Access API `_ -to create, update and remove Dataverses, Datasets and Datafiles. +It uses the `Dataverse API `_ +and it's metadata data model to import, manipulate and export Dataverses, Datasets +and Datafiles. ------------------- @@ -46,22 +46,24 @@ Quickstart **Usage** >>> from pyDataverse.api import Api +>>> from pyDataverse.models import Dataverse >>> # establish connection ->>> base_url = 'http://demo.dataverse.org' +>>> base_url = 'https://data.aussda.at/' >>> api = Api(base_url) >>> api.status 'OK' >>> # get dataverse ->>> dv = 'ecastro' # dataverse alias or id +>>> dv = 'autnes' # dataverse alias or id >>> resp = api.get_dataverse(dv) >>> resp.json()['data']['creationDate'] -'2015-04-20T09:29:39Z' +'2017-11-09T13:53:27Z' >>> # get dataset +>>> identifier = 'doi:10.11587/IMKDZI' >>> resp = api.get_dataset(identifier) >>> resp.json()['data']['id'] -24 +345 >>> # get datafile ->>> datafile_id = '32' # file id of the datafile +>>> datafile_id = '399' # file id of the datafile >>> resp = api.get_datafile(datafile_id) >>> resp @@ -80,9 +82,10 @@ External packages: Features ----------------------------- -- Dataverse Api functionalities to create, get, publish and delete Dataverses, Datasets and Datafiles. -- Utils to support the core functionalities. -- Custom exceptions +- Dataverse Api functionalities to create, get, publish and delete Dataverses, Datasets and Datafiles of your Dataverse instance via Api. +- Dataverse metadata model for easy manipulation and data conversion from and to other formats (e. g. Dataverse Api metadata JSON). +- Utils to support core functionalities. +- Custom exceptions. - Tests on `Travis CI `_ (`pytest `_ + `tox `_). - Open Source (`MIT `_) diff --git a/src/pyDataverse/models.py b/src/pyDataverse/models.py new file mode 100644 index 0000000..f5fead2 --- /dev/null +++ b/src/pyDataverse/models.py @@ -0,0 +1,1442 @@ +# !/usr/bin/env python +# -*- coding: utf-8 -*- +"""Dataverse data-types data model.""" +from __future__ import absolute_import +from pyDataverse.utils import dict_to_json +from pyDataverse.utils import read_file_json +from pyDataverse.utils import write_file_json + + +""" +Data-structure to work with data and metadata of Dataverses, Datasets and +Datafiles - coming from different sources. +""" + + +class Dataverse(object): + """Base class for Dataverse data model.""" + + """Attributes required for Dataverse metadata json.""" + __attr_required_metadata = [ + 'alias', + 'name', + 'dataverseContacts' + ] + """Attributes valid for Dataverse metadata json.""" + __attr_valid_metadata = [ + 'alias', + 'name', + 'affiliation', + 'description', + 'dataverseContacts', + 'dataverseType' + ] + """Attributes valid for Dataverse class.""" + __attr_valid_class = [ + # 'datasets', + # 'dataverses', + 'pid' + ] + __attr_valid_metadata + + def __init__(self): + """Init a Dataverse() class. + + Examples + ------- + Create a Dataverse:: + + >>> from pyDataverse.models import Dataverse + >>> dv = Dataverse() + + """ + """Misc""" + self.datasets = [] + self.dataverses = [] + self.pid = None + + """Metadata""" + self.name = None + self.alias = None + self.dataverseContacts = [] + self.affiliation = None + self.description = None + self.dataverseType = None + + def __str__(self): + """Return name of Dataverse() class for users.""" + return 'pyDataverse Dataverse() model class.' + + def set(self, data): + """Set class attributes with a flat dict. + + Parameters + ---------- + data : dict + Flat dict with data. Key's must be name the same as the class + attribute, the data should be mapped to. + + Examples + ------- + Set Dataverse attributes via flat dict:: + + >>> from pyDataverse.models import Dataverse + >>> dv = Dataverse() + >>> data = { + >>> 'dataverseContacts': [{'contactEmail': 'test@example.com'}], + >>> 'name': 'Test pyDataverse', + >>> 'alias': 'test-pyDataverse' + >>> } + >>> dv.set(data) + >>> dv.name + 'Test pyDataverse' + + """ + for key, val in data.items(): + if key in self.__attr_valid_class: + self.__setattr__(key, val) + else: + # TODO: Raise Exception + print('Key {0} not valid.'.format(key)) + + def import_metadata(self, filename, format='dv_up'): + """Import Dataverse metadata from file. + + This simply parses in data with valid attribute naming as keys. + Data must not be complete, and also attributes required for the + metadata json export can be missing. + + Parameters + ---------- + filename : string + Filename with full path. + format : string + Data format of input. Available formats are: `dv_up` for Dataverse + Api upload compatible format. + + Examples + ------- + Import metadata coming from json file:: + + >>> from pyDataverse.models import Dataverse + >>> dv = Dataverse() + >>> dv.import_metadata('tests/data/dataverse_min.json') + >>> dv.name + 'Test pyDataverse' + + """ + data = {} + if format == 'dv_up': + metadata = read_file_json(filename) + # get first level metadata and parse it automatically + for attr in self.__attr_valid_metadata: + if attr in metadata: + data[attr] = metadata[attr] + self.set(data) + elif format == 'dv_down': + metadata = read_file_json(filename) + self.set(data) + else: + # TODO: Exception + print('Data-format not right.') + + def is_valid(self): + """Check if set attributes are valid for Dataverse api metadata creation. + + The attributes required are listed in `__attr_required_metadata`. + + Returns + ------- + bool + True, if creation of metadata json is possible. False, if not. + + Examples + ------- + Check if metadata is valid for Dataverse api upload:: + + >>> from pyDataverse.models import Dataverse + >>> dv = Dataverse() + >>> data = { + >>> 'dataverseContacts': [{'contactEmail': 'test@example.com'}], + >>> 'name': 'Test pyDataverse', + >>> 'alias': 'test-pyDataverse' + >>> } + >>> dv.set(data) + >>> dv.is_valid + True + >>> dv.name = None + >>> dv.is_valid + False + + """ + is_valid = True + for attr in self.__attr_required_metadata: + if not self.__getattribute__(attr): + is_valid = False + print('attribute \'{0}\' missing.'.format(attr)) + return is_valid + + def dict(self, format='dv_up'): + """Create dicts in different data formats. + + `dv_up`: Checks if data is valid for the different dict formats. + + Parameters + ---------- + format : string + Data format for dict creation. Available formats are: `dv_up` with + all metadata for Dataverse api upload, and `all` with all attributes + set. + + Returns + ------- + dict + Data as dict. + + Examples + ------- + Get dict of Dataverse metadata:: + + >>> from pyDataverse.models import Dataverse + >>> dv = Dataverse() + >>> data = { + >>> 'dataverseContacts': [{'contactEmail': 'test@example.com'}], + >>> 'name': 'Test pyDataverse', + >>> 'alias': 'test-pyDataverse' + >>> } + >>> dv.set(data) + >>> data = dv.dict() + >>> data['name'] + 'Test pyDataverse' + + Todo + ------- + Validate standards. + + """ + data = {} + if format == 'dv_up': + if self.is_valid(): + for attr in self.__attr_valid_metadata: + if self.__getattribute__(attr) is not None: + data[attr] = self.__getattribute__(attr) + # TODO: prüfen, ob required attributes gesetzt sind = Exception + return data + else: + print('dict can not be created. Data is not valid for format') + return None + elif format == 'all': + for attr in self.__attr_valid_class: + if self.__getattribute__(attr) is not None: + data[attr] = self.__getattribute__(attr) + return data + else: + # TODO: Exception + print('Format not right for dict.') + return None + + def json(self, format='dv_up'): + r"""Create json from attributes. + + Parameters + ---------- + format : string + Data format of input. Available formats are: `dv_up` for Dataverse + Api upload compatible format and `all` with all attributes named in + `__attr_valid_class`. + + Returns + ------- + string + json-formatted string of Dataverse metadata for api upload. + + Examples + ------- + Get dict of Dataverse metadata:: + + >>> from pyDataverse.models import Dataverse + >>> dv = Dataverse() + >>> data = { + >>> 'dataverseContacts': [{'contactEmail': 'test@example.com'}], + >>> 'name': 'Test pyDataverse', + >>> 'alias': 'test-pyDataverse' + >>> } + >>> dv.set(data) + >>> data = dv.json() + >>> data + '{\n "name": "Test pyDataverse",\n "dataverseContacts": [\n {\n "contactEmail": "test@example.com"\n }\n ],\n "alias": "test-pyDataverse"\n}' + + Todo + ------- + Validate standards. + + """ + if format == 'dv_up': + data = self.dict('dv_up') + if data: + return dict_to_json(data) + else: + return None + elif format == 'all': + data = self.dict('all') + if data: + return dict_to_json(data) + else: + return None + else: + # TODO Exception + print('data format not valid.') + + def export_metadata(self, filename, format='dv_up'): + """Export Dataverse metadata to Dataverse api upload json. + + Parameters + ---------- + filename : string + Filename with full path. + format : string + Data format for export. Available format is: `dv_up` with all + metadata for Dataverse api upload. + + Examples + ------- + Export Dataverse metadata:: + + >>> from pyDataverse.models import Dataverse + >>> dv = Dataverse() + >>> data = { + >>> 'dataverseContacts': [{'contactEmail': 'test@example.com'}], + >>> 'name': 'Test pyDataverse', + >>> 'alias': 'test-pyDataverse' + >>> } + >>> dv.set(data) + >>> dv.export_metadata('tests/data/dataverse_export.json') + + """ + if format == 'dv_up': + return write_file_json(filename, self.dict()) + else: + # TODO: Exception + print('Data-format not right.') + + +class Dataset(object): + """Base class for the Dataset data model.""" + + """Attributes required for Dataset metadata json.""" + __attr_required_metadata = [ + 'title', + 'author', + 'datasetContact', + 'dsDescription', + 'subject' + ] + + """ + Dataset metadata attributes of Dataverse api upload inside + [\'datasetVersion\']. + """ + __attr_valid_metadata_datasetVersion = [ + 'license', + 'termsOfUse', + 'termsOfAccess' + ] + + """ + Dataset metadata attributes of Dataverse api upload inside + [\'datasetVersion\'][\'metadataBlocks\'][\'citation\']. + """ + __attr_valid_metadata_citation_dicts = [ + 'title', + 'subtitle', + 'alternativeTitle', + 'alternativeURL', + 'subject', + 'notesText', + 'productionDate', + 'productionPlace', + 'distributionDate', + 'depositor', + 'dateOfDeposit', + 'kindOfData', + 'seriesName', + 'seriesInformation', + 'relatedMaterial', + 'relatedDatasets', + 'otherReferences', + 'dataSources', + 'originOfSources', + 'characteristicOfSources', + 'accessToSources', + 'kindOfData' + ] + + """ + Dataset metadata attributes of Dataverse api upload inside + [\'datasetVersion\'][\'metadataBlocks\'][\'citation\'][\'fields\']. + """ + __attr_valid_metadata_citation_arrays = { + 'otherId': ['otherIdAgency', 'otherIdValue'], + 'author': ['authorName', 'authorAffiliation', 'authorIdentifierScheme', + 'authorIdentifier'], + 'datasetContact': ['datasetContactName', 'datasetContactAffiliation', + 'datasetContactEmail'], + 'dsDescription': ['dsDescriptionValue', 'dsDescriptionDate'], + 'keyword': ['keywordValue', 'keywordVocabulary', + 'keywordVocabularyURI'], + 'producer': ['producerName', 'producerAffiliation', + 'producerAbbreviation', 'producerURL', 'producerLogoURL'], + 'contributor': ['contributorType', 'contributorName'], + 'grantNumber': ['grantNumberAgency', 'grantNumberValue'], + 'topicClassification': ['topicClassValue', 'topicClassVocab'], + 'publication': ['publicationCitation', 'publicationIDType', + 'publicationIDNumber', 'publicationURL'], + 'distributor': ['distributorName', 'distributorAffiliation', + 'distributorAbbreviation', 'distributorURL', + 'distributorLogoURL'], + 'timePeriodCovered': ['timePeriodCoveredStart', + 'timePeriodCoveredEnd'], + 'dateOfCollection': ['dateOfCollectionStart', 'dateOfCollectionEnd'], + 'software': ['softwareName', 'softwareVersion'] + } + + """ + Dataset metadata attributes of Dataverse api upload inside + [\'datasetVersion\'][\'metadataBlocks\'][\'geospatial\']. + """ + __attr_valid_metadata_geospatial_dicts = [ + 'geographicUnit' + ] + + """ + Dataset metadata attributes of Dataverse api upload inside + [\'datasetVersion\'][\'metadataBlocks\'][\'geospatial\'][\'fields\']. + """ + __attr_valid_metadata_geospatial_arrays = { + 'geographicCoverage': ['country', 'state', 'city', + 'otherGeographicCoverage'], + 'geographicBoundingBox': ['westLongitude', 'eastLongitude', + 'northLongitude', 'southLongitude'] + } + + """ + Dataset metadata attributes of Dataverse api upload inside + [\'datasetVersion\'][\'metadataBlocks\'][\'socialscience\']. + """ + __attr_valid_metadata_socialscience_dicts = [ + 'unitOfAnalysis', + 'universe', + 'timeMethod', + 'dataCollector', + 'collectorTraining', + 'frequencyOfDataCollection', + 'samplingProcedure', + 'deviationsFromSampleDesign', + 'collectionMode', + 'researchInstrument', + 'dataCollectionSituation', + 'actionsToMinimizeLoss', + 'controlOperations', + 'weighting', + 'cleaningOperations', + 'datasetLevelErrorNotes', + 'responseRate', + 'samplingErrorEstimates', + 'otherDataAppraisal', + ] + + """ + Dataset metadata attributes of Dataverse api upload inside + [\'datasetVersion\'][\'metadataBlocks\'][\'journal\']. + """ + __attr_valid_metadata_journal_dicts = [ + 'journalArticleType' + ] + + """ + Dataset metadata attributes of Dataverse api upload inside + [\'datasetVersion\'][\'metadataBlocks\'][\'journal\'][\'fields\']. + """ + __attr_valid_metadata_journal_arrays = { + 'journalVolumeIssue': ['journalVolume', 'journalIssue', + 'journalPubDate'] + } + + """Attributes valid for Dataset class.""" + __attr_valid_class = [ + 'datafiles' + ] + __attr_valid_metadata_datasetVersion \ + + __attr_valid_metadata_citation_dicts \ + + list(__attr_valid_metadata_citation_arrays.keys()) \ + + __attr_valid_metadata_geospatial_dicts \ + + list(__attr_valid_metadata_geospatial_arrays.keys()) \ + + __attr_valid_metadata_socialscience_dicts \ + + __attr_valid_metadata_journal_dicts \ + + list(__attr_valid_metadata_journal_arrays.keys()) \ + + def __init__(self): + """Init a Dataset() class. + + Examples + ------- + Create a Dataverse:: + + >>> from pyDataverse.models import Dataset + >>> ds = Dataset() + + """ + """Misc""" + self.datafiles = [] + + """Metadata: dataset""" + self.license = None + self.termsOfUse = None + self.termsOfAccess = None + + """Metadata: citation""" + self.citation_displayName = None + self.title = None + self.subtitle = None + self.alternativeTitle = None + self.alternativeURL = None + self.otherId = [] + self.author = [] + self.datasetContact = [] + self.dsDescription = [] + self.subject = [] + self.keyword = [] + self.topicClassification = [] + self.publication = [] + self.notesText = None + self.producer = [] + self.productionDate = None + self.productionPlace = None + self.contributor = [] + self.grantNumber = [] + self.distributor = [] + self.distributionDate = None + self.depositor = None + self.dateOfDeposit = None + self.timePeriodCovered = [] + self.dateOfCollection = [] + self.kindOfData = [] + self.seriesName = None + self.seriesInformation = None + self.software = [] + self.relatedMaterial = [] + self.relatedDatasets = [] + self.otherReferences = [] + self.dataSources = [] + self.originOfSources = None + self.characteristicOfSources = None + self.accessToSources = None + + """Metadata: geospatial""" + self.geospatial_displayName = None + self.geographicCoverage = [] + self.geographicUnit = None + self.geographicBoundingBox = [] + + """Metadata: socialscience""" + self.socialscience_displayName = None + self.unitOfAnalysis = [] + self.universe = [] + self.timeMethod = None + self.dataCollector = None + self.collectorTraining = None + self.frequencyOfDataCollection = None + self.samplingProcedure = None + self.targetSampleActualSize = None + self.targetSampleSizeFormula = None + self.socialScienceNotesType = None + self.socialScienceNotesSubject = None + self.socialScienceNotesText = None + self.deviationsFromSampleDesign = None + self.collectionMode = None + self.researchInstrument = None + self.dataCollectionSituation = None + self.actionsToMinimizeLoss = None + self.controlOperations = None + self.weighting = None + self.cleaningOperations = None + self.datasetLevelErrorNotes = None + self.responseRate = None + self.samplingErrorEstimates = None + self.otherDataAppraisal = None + + """Metadata: journal""" + self.journal_displayName = None + self.journalVolumeIssue = [] + self.journalArticleType = None + + def __str__(self): + """Return name of Dataset() class for users.""" + return 'pyDataverse Dataset() model class.' + + def set(self, data): + """Set class attributes with a flat dict as input. + + Parameters + ---------- + data : dict + Flat dict with data. Key's must be name the same as the class + attribute, the data should be mapped to. + + Examples + ------- + Set Dataverse attributes via flat dict:: + + >>> from pyDataverse.models import Dataset + >>> ds = Dataset() + >>> data = { + >>> 'title': 'pyDataverse study 2019', + >>> 'dsDescription': 'New study about pyDataverse usage in 2019' + >>> } + >>> ds.set(data) + >>> ds.title + 'pyDataverse study 2019' + + """ + for key, val in data.items(): + if key in self.__attr_valid_class or key == 'citation_displayName' or key == 'geospatial_displayName' or key == 'socialscience_displayName' or key == 'journal_displayName' or key == 'targetSampleActualSize' or key == 'targetSampleSizeFormula' or key == 'socialScienceNotesType' or key == 'socialScienceNotesText' or key == 'socialScienceNotesSubject': + self.__setattr__(key, val) + else: + # TODO: Raise Exception + print('Key {0} not valid.'.format(key)) + + def import_metadata(self, filename, format='dv_up'): + """Import Dataset metadata from file. + + Parameters + ---------- + filename : string + Filename with full path. + format : string + Data format of input. Available formats are: `dv_up` for Dataverse + api upload compatible format. + + Examples + ------- + Set Dataverse attributes via flat dict:: + + >>> from pyDataverse.models import Dataset + >>> ds = Dataset() + >>> ds.import_metadata('tests/data/dataset_full.json') + >>> ds.title + 'Replication Data for: Title' + + """ + data = {} + if format == 'dv_up': + metadata = read_file_json(filename) + """dataset""" + # get first level metadata and parse it automatically + for key, val in metadata['datasetVersion'].items(): + if key in self.__attr_valid_metadata_datasetVersion: + data[key] = val + + # get nested metadata and parse it manually + if 'dataverseContacts' in metadata: + data['contactEmail'] = [] + for contact in metadata['dataverseContacts']: + for key, val in contact.items(): + if key == 'contactEmail': + data['contactEmail'].append(val) + + """citation""" + if 'citation' in metadata['datasetVersion']['metadataBlocks']: + citation = metadata['datasetVersion']['metadataBlocks']['citation'] + if 'displayName' in citation: + data['citation_displayName'] = citation['displayName'] + + for field in citation['fields']: + if field['typeName'] in self.__attr_valid_metadata_citation_dicts: + data[field['typeName']] = field['value'] + + if field['typeName'] in self.__attr_valid_metadata_citation_arrays: + data[field['typeName']] = self.__parse_dicts( + field['value'], + self.__attr_valid_metadata_citation_arrays[field['typeName']]) + + if field['typeName'] == 'series': + if 'seriesName' in field['value']: + data['seriesName'] = field['value']['seriesName']['value'] + if 'seriesInformation' in field['value']: + data['seriesInformation'] = field['value']['seriesInformation']['value'] + else: + # TODO: Exception + print('citation not in json') + + """geospatial""" + if 'geospatial' in metadata['datasetVersion']['metadataBlocks']: + geospatial = metadata['datasetVersion']['metadataBlocks']['geospatial'] + if 'displayName' in geospatial: + self.__setattr__('geospatial_displayName', + geospatial['displayName']) + + for field in geospatial['fields']: + if field['typeName'] in self.__attr_valid_metadata_geospatial_dicts: + data[field['typeName']] = field['value'] + + if field['typeName'] in self.__attr_valid_metadata_geospatial_arrays: + data[field['typeName']] = self.__parse_dicts( + field['value'], + self.__attr_valid_metadata_geospatial_arrays[field['typeName']]) + else: + # TODO: Exception + print('geospatial not in json') + + """socialscience""" + if 'socialscience' in metadata['datasetVersion']['metadataBlocks']: + socialscience = metadata['datasetVersion']['metadataBlocks']['socialscience'] + if 'displayName' in socialscience: + self.__setattr__('socialscience_displayName', + socialscience['displayName']) + + for field in socialscience['fields']: + if field['typeName'] in self.__attr_valid_metadata_socialscience_dicts: + data[field['typeName']] = field['value'] + + if field['typeName'] == 'targetSampleSize': + if 'targetSampleActualSize' in field['value']: + data['targetSampleActualSize'] = field['value']['targetSampleActualSize']['value'] + if 'targetSampleSizeFormula' in field['value']: + data['targetSampleSizeFormula'] = field['value']['targetSampleSizeFormula']['value'] + + if field['typeName'] == 'socialScienceNotes': + if 'socialScienceNotesType' in field['value']: + data['socialScienceNotesType'] = field['value']['socialScienceNotesType']['value'] + if 'socialScienceNotesSubject' in field['value']: + data['socialScienceNotesSubject'] = field['value']['socialScienceNotesSubject']['value'] + if 'socialScienceNotesText' in field['value']: + data['socialScienceNotesText'] = field['value']['socialScienceNotesText']['value'] + else: + # TODO: Exception + print('socialscience not in json') + + """journal""" + if 'journal' in metadata['datasetVersion']['metadataBlocks']: + journal = metadata['datasetVersion']['metadataBlocks']['journal'] + if 'displayName' in journal: + self.__setattr__('journal_displayName', + journal['displayName']) + + for field in journal['fields']: + if field['typeName'] in self.__attr_valid_metadata_journal_dicts: + data[field['typeName']] = field['value'] + + if field['typeName'] in self.__attr_valid_metadata_journal_arrays: + data[field['typeName']] = self.__parse_dicts( + field['value'], + self.__attr_valid_metadata_journal_arrays[field['typeName']]) + else: + # TODO: Exception + print('journal not in json') + + self.set(data) + elif format == 'dv_down': + metadata = read_file_json(filename) + self.set(data) + else: + # TODO: Exception + print('Data-format not right') + + def __parse_dicts(self, data, attr_list): + """Parse out Dataverse api metadata dicts. + + Parameters + ---------- + data : list + List of Dataverse api metadata fields. + attr_list : list + List of attributes to be parsed. + + Returns + ------- + list + List of dicts with parsed out key-value pairs. + + """ + data_tmp = [] + + for d in data: + tmp_dict = {} + for key, val in d.items(): + if key in attr_list: + tmp_dict[key] = val['value'] + else: + print('Key \'{0}\' not in attribute list'.format(key)) + data_tmp.append(tmp_dict) + + return data_tmp + + def is_valid(self): + """Check if attributes available are valid for Dataverse api metadata creation. + + The attributes required are listed in `__attr_required_metadata`. + + Returns + ------- + bool + True, if creation of metadata json is possible. False, if not. + + Examples + ------- + Check if metadata is valid for Dataverse api upload:: + + >>> from pyDataverse.models import Dataset + >>> ds = Dataset() + >>> data = { + >>> 'title': 'pyDataverse study 2019', + >>> 'dsDescription': 'New study about pyDataverse usage in 2019' + >>> } + >>> ds.set(data) + >>> ds.is_valid() + False + >>> ds.author = [{'authorName': 'LastAuthor1, FirstAuthor1'}] + >>> ds.datasetContact = [{'datasetContactName': 'LastContact1, FirstContact1'}] + >>> ds.subject = ['Engineering'] + >>> ds.is_valid() + True + + Todo + ------- + Test out required fields or ask Harvard. + + """ + is_valid = True + + # check if all required attributes are set + for attr in self.__attr_required_metadata: + if not self.__getattribute__(attr): + is_valid = False + print('Metadata not valid: attribute \'{0}\' missing.'.format(attr)) + + # check if attribute sets are complete where necessary + tp_cov = self.__getattribute__('timePeriodCovered') + if tp_cov: + for tp in tp_cov: + if 'timePeriodCoveredStart' in tp or 'timePeriodCoveredEnd' in tp: + if not ('timePeriodCoveredStart' in tp and 'timePeriodCoveredEnd' in tp): + is_valid = False + + d_coll = self.__getattribute__('dateOfCollection') + if d_coll: + for d in d_coll: + if 'dateOfCollectionStart' in d or 'dateOfCollectionEnd' in d: + if not ('dateOfCollectionStart' in d and 'dateOfCollectionEnd' in d): + is_valid = False + + authors = self.__getattribute__('author') + if authors: + for a in authors: + if 'authorAffiliation' in a or 'authorIdentifierScheme' in a or 'authorIdentifier' in a: + if 'authorName' not in a: + is_valid = False + + ds_contac = self.__getattribute__('datasetContact') + if ds_contac: + for c in ds_contac: + if 'datasetContactAffiliation' in c or 'datasetContactEmail' in c: + if 'datasetContactName' not in c: + is_valid = False + + producer = self.__getattribute__('producer') + if producer: + for p in producer: + if 'producerAffiliation' in p or 'producerAbbreviation' in p or 'producerURL' in p or 'producerLogoURL' in p: + if not p['producerName']: + is_valid = False + + contributor = self.__getattribute__('contributor') + if contributor: + for c in contributor: + if 'contributorType' in c: + if 'contributorName' not in c: + is_valid = False + + distributor = self.__getattribute__('distributor') + if distributor: + for d in distributor: + if 'distributorAffiliation' in d or 'distributorAbbreviation' in d or 'distributorURL' in d or 'distributorLogoURL' in d: + if 'distributorName' not in d: + is_valid = False + + bbox = self.__getattribute__('geographicBoundingBox') + if bbox: + for b in bbox: + if b: + if not ('westLongitude' in b and 'eastLongitude' in b and 'northLongitude' in b and 'southLongitude' in b): + is_valid = False + + return is_valid + + def dict(self, format='dv_up'): + """Create dicts in different data formats. + + Parameters + ---------- + format : string + Data format for dict creation. Available formats are: `dv_up` with + all metadata for Dataverse api upload, and `all` with all attributes + set. + + Returns + ------- + dict + Data as dict. + + Examples + ------- + Get dict of Dataverse metadata:: + + >>> from pyDataverse.models import Dataset + >>> ds = Dataset() + >>> data = { + >>> 'title': 'pyDataverse study 2019', + >>> 'dsDescription': 'New study about pyDataverse usage in 2019' + >>> } + >>> ds.set(data) + >>> data = dv.dict() + >>> data['title'] + 'pyDataverse study 2019' + + Todo + ------- + Validate standard + + """ + if format == 'dv_up': + if self.is_valid(): + data = {} + data['datasetVersion'] = {} + data['datasetVersion']['metadataBlocks'] = {} + citation = {} + citation['fields'] = [] + geospatial = {} + geospatial['fields'] = [] + socialscience = {} + socialscience['fields'] = [] + journal = {} + journal['fields'] = [] + + """dataset""" + # Generate first level attributes + for attr in self.__attr_valid_metadata_datasetVersion: + if self.__getattribute__(attr) is not None: + data['datasetVersion'][attr] = self.__getattribute__(attr) + + """citation""" + if self.citation_displayName: + citation['displayName'] = self.citation_displayName + + # Generate first level attributes + for attr in self.__attr_valid_metadata_citation_dicts: + if self.__getattribute__(attr) is not None: + citation['fields'].append({ + 'typeName': attr, + 'value': self.__getattribute__(attr) + }) + + # Generate fields attributes + for key, val in self.__attr_valid_metadata_citation_arrays.items(): + if self.__getattribute__(key) is not None: + citation['fields'].append({ + 'typeName': key, + 'value': self.__generate_dicts(key, val) + }) + + # Generate series attributes + if self.__getattribute__('series') is not None: + tmp_dict = {} + tmp_dict['value'] = {} + if 'seriesName' in self.__getattribute__('series'): + if self.__getattribute__('seriesName') is not None: + tmp_dict['value']['seriesName'] = {} + tmp_dict['value']['seriesName']['typeName'] = 'seriesName' + tmp_dict['value']['seriesName']['value'] = self.__getattribute__('seriesName') + if 'seriesInformation' in self.__getattribute__('series'): + if self.__getattribute__('seriesInformation') is not None: + tmp_dict['value']['seriesInformation'] = {} + tmp_dict['value']['seriesInformation']['typeName'] = 'seriesInformation' + tmp_dict['value']['seriesInformation']['value'] = self.__getattribute__('seriesInformation') + citation['fields'].append({ + 'typeName': 'series', + 'value': tmp_dict + }) + + """geospatial""" + # Generate first level attributes + for attr in self.__attr_valid_metadata_geospatial_dicts: + if self.__getattribute__(attr) is not None: + geospatial['fields'].append({ + 'typeName': attr, + 'value': self.__getattribute__(attr) + }) + + # Generate fields attributes + for key, val in self.__attr_valid_metadata_geospatial_arrays.items(): + # check if attribute exists + if self.__getattribute__(key) is not None: + geospatial['fields'].append({ + 'typeName': key, + 'value': self.__generate_dicts(key, val) + }) + + """socialscience""" + # Generate first level attributes + for attr in self.__attr_valid_metadata_socialscience_dicts: + if self.__getattribute__(attr) is not None: + socialscience['fields'].append({ + 'typeName': attr, + 'value': self.__getattribute__(attr) + }) + + # Generate targetSampleSize attributes + if self.__getattribute__('targetSampleSize') is not None: + tmp_dict = {} + tmp_dict['value'] = {} + if 'targetSampleActualSize' in self.__getattribute__('targetSampleSize'): + if self.__getattribute__('targetSampleActualSize') is not None: + tmp_dict['value']['targetSampleActualSize'] = {} + tmp_dict['value']['targetSampleActualSize']['typeName'] = 'targetSampleActualSize' + tmp_dict['value']['targetSampleActualSize']['value'] = self.__getattribute__('targetSampleActualSize') + if 'targetSampleSizeFormula' in self.__getattribute__('targetSampleSize'): + if self.__getattribute__('targetSampleSizeFormula') is not None: + tmp_dict['value']['targetSampleSizeFormula'] = {} + tmp_dict['value']['targetSampleSizeFormula']['typeName'] = 'targetSampleSizeFormula' + tmp_dict['value']['targetSampleSizeFormula']['value'] = self.__getattribute__('targetSampleSizeFormula') + socialscience['fields'].append({ + 'typeName': 'series', + 'value': tmp_dict + }) + + # Generate socialScienceNotes attributes + if self.__getattribute__('socialScienceNotes') is not None: + tmp_dict = {} + tmp_dict['value'] = {} + if 'socialScienceNotesType' in self.__getattribute__('socialScienceNotes'): + if self.__getattribute__('socialScienceNotesType') is not None: + tmp_dict['value']['socialScienceNotesType'] = {} + tmp_dict['value']['socialScienceNotesType']['typeName'] = 'socialScienceNotesType' + tmp_dict['value']['socialScienceNotesType']['value'] = self.__getattribute__('socialScienceNotesType') + if 'socialScienceNotesSubject' in self.__getattribute__('socialScienceNotes'): + if self.__getattribute__('socialScienceNotesSubject') is not None: + tmp_dict['value']['socialScienceNotesSubject'] = {} + tmp_dict['value']['socialScienceNotesSubject']['typeName'] = 'socialScienceNotesSubject' + tmp_dict['value']['socialScienceNotesSubject']['value'] = self.__getattribute__('socialScienceNotesSubject') + if 'socialScienceNotesText' in self.__getattribute__('socialScienceNotes'): + if self.__getattribute__('socialScienceNotesText') is not None: + tmp_dict['value']['socialScienceNotesText'] = {} + tmp_dict['value']['socialScienceNotesText']['typeName'] = 'socialScienceNotesText' + tmp_dict['value']['socialScienceNotesText']['value'] = self.__getattribute__('socialScienceNotesText') + socialscience['fields'].append({ + 'typeName': 'series', + 'value': tmp_dict + }) + + """journal""" + # Generate first level attributes + for attr in self.__attr_valid_metadata_journal_dicts: + if self.__getattribute__(attr) is not None: + journal['fields'].append({ + 'typeName': attr, + 'value': self.__getattribute__(attr) + }) + + # Generate fields attributes + for key, val in self.__attr_valid_metadata_journal_arrays.items(): + if self.__getattribute__(key) is not None: + journal['fields'].append({ + 'typeName': key, + 'value': self.__generate_dicts(key, val) + }) + + # TODO: prüfen, ob required attributes gesetzt sind. wenn nicht = Exception! + data['datasetVersion']['metadataBlocks']['citation'] = citation + data['datasetVersion']['metadataBlocks']['socialscience'] = socialscience + data['datasetVersion']['metadataBlocks']['geospatial'] = geospatial + data['datasetVersion']['metadataBlocks']['journal'] = journal + + return data + else: + print('dict can not be created. Data is not valid for format') + return None + elif format == 'all': + for attr in self.__attr_valid_class: + if self.__getattribute__(attr) is not None: + data[attr] = self.__getattribute__(attr) + return data + + else: + print('dict can not be created. Format is not valid') + return None + + def __generate_dicts(self, key, val): + """Generate dicts for array attributes of Dataverse api metadata upload. + + Parameters + ---------- + key : string + Name of attribute + val : string + Value of attribute. + + Returns + ------- + list + List of filled dicts of metadata for Dataverse api upload. + + """ + # check if attribute exists + tmp_list = [] + if self.__getattribute__(key): + # loop over list of attribute dicts() + for d in self.__getattribute__(key): + tmp_dict = {} + # iterate over key-value pairs + for k, v in d.items(): + # check if key is in attribute list + if k in val: + tmp_dict[k] = {} + tmp_dict[k]['typeName'] = k + tmp_dict[k]['value'] = v + tmp_list.append(tmp_dict) + + return tmp_list + + def json(self, format='dv_up'): + """Create Dataset json from attributes. + + Parameters + ---------- + format : string + Data format of input. Available formats are: `dv_up` for Dataverse + Api upload compatible format and `all` with all attributes named in + `__attr_valid_class`. + + Returns + ------- + string + json-formatted string of Dataverse metadata for api upload. + + Examples + ------- + Get json of Dataverse api upload:: + + >>> from pyDataverse.models import Dataset + >>> ds = Dataset() + >>> data = { + >>> 'title': 'pyDataverse study 2019', + >>> 'dsDescription': 'New study about pyDataverse usage in 2019' + >>> 'author': [{'authorName': 'LastAuthor1, FirstAuthor1'}], + >>> 'datasetContact': [{'datasetContactName': 'LastContact1, FirstContact1'}], + >>> 'subject': ['Engineering'], + >>> } + >>> ds.set(data) + >>> data = ds.json() + + Todo + ------- + TODO: Validate standard + TODO: Link to default json file + + """ + if format == 'dv_up': + return dict_to_json(self.dict()) + elif format == 'all': + return dict_to_json(self.dict('all')) + else: + # TODO Exception + print('data format not valid.') + + def export_metadata(self, filename, format='dv_up'): + """Export Dataset metadata to Dataverse api upload json. + + Parameters + ---------- + filename : string + Filename with full path. + format : string + Data format for export. Available format is: `dv_up` with all + metadata for Dataverse api upload. + + Examples + ------- + Export metadata to json file:: + + >>> from pyDataverse.models import Dataset + >>> ds = Dataset() + >>> data = { + >>> 'title': 'pyDataverse study 2019', + >>> 'dsDescription': 'New study about pyDataverse usage in 2019' + >>> 'author': [{'authorName': 'LastAuthor1, FirstAuthor1'}], + >>> 'datasetContact': [{'datasetContactName': 'LastContact1, FirstContact1'}], + >>> 'subject': ['Engineering'], + >>> } + >>> ds.export_metadata('tests/data/export_dataset.json') + + """ + if format == 'dv_up': + return write_file_json(filename, self.dict()) + else: + # TODO: Exception + print('Data-format not right.') + + +class Datafile(object): + """Base class for the Datafile model. + + Parameters + ---------- + filename : string + Filename with full path. + pid : type + Description of parameter `pid` (the default is None). + + Attributes + ---------- + description : string + Description of datafile + restrict : bool + Unknown + __attr_required_metadata : list + List with required metadata. + __attr_valid_metadata : list + List with valid metadata for Dataverse api upload. + __attr_valid_class : list + List of all attributes. + pid + filename + + """ + + """Attributes required for Datafile metadata json.""" + __attr_required_metadata = [ + 'filename', + 'pid' + ] + + """Attributes on first level of Datafile metadata json.""" + __attr_valid_metadata = [ + 'description', + 'pid', + 'restrict' + ] + """Attributes on first level of Datafile metadata json.""" + __attr_valid_class = [ + 'filename' + ] + __attr_valid_metadata + + def __init__(self, filename=None, pid=None): + """Init a Datafile() class. + + Parameters + ---------- + filename : string + Filename with full path. + pid : string + Persistend identifier, e.g. DOI. + + Examples + ------- + Create a Datafile:: + + >>> from pyDataverse.models import Datafile + >>> df = Datafile() + >>> df + + + """ + """Misc""" + self.pid = pid + self.filename = filename + + """Metadata""" + self.description = None + self.restrict = None + + def __str__(self): + """Return name of Datafile() class for users.""" + return 'pyDataverse Datafile() model class.' + + def set(self, data): + """Set class attributes with a flat dict. + + Parameters + ---------- + data : dict + Flat dict with data. Key's must be name the same as the class + attribute, the data should be mapped to. + + Examples + ------- + Set Datafile attributes via flat dict:: + + >>> from pyDataverse.models import Datafile + >>> df = Datafile() + >>> data = { + >>> 'pid': 'doi:10.11587/EVMUHP', + >>> 'description': 'Test file', + >>> 'filename': 'tests/data/datafile.txt' + >>> } + >>> df.set(data) + >>> df.pid + 'doi:10.11587/EVMUHP', + + """ + for key, val in data.items(): + if key in self.__attr_valid_class: + self.__setattr__(key, val) + else: + # TODO: Raise Exception + print('Key {0} not valid.'.format(key)) + + def is_valid(self): + """Check if set attributes are valid for Dataverse api metadata creation. + + Returns + ------- + bool + True, if creation of metadata json is possible. False, if not. + + Examples + ------- + Check if metadata is valid for Dataverse api upload:: + + >>> from pyDataverse.models import Datafile + >>> df = Datafile() + >>> data = { + >>> 'pid': 'doi:10.11587/EVMUHP', + >>> 'description': 'Test file', + >>> 'filename': 'tests/data/datafile.txt' + >>> } + >>> df.set(data) + >>> df.is_valid + True + >>> df.filename = None + >>> df.is_valid + False + + """ + is_valid = True + + for attr in self.__attr_required_metadata: + if self.__getattribute__(attr) is None: + is_valid = False + print('attribute \'{0}\' missing.'.format(attr)) + + return is_valid + + def dict(self, format='dv_up'): + """Create dict in different data formats. + + Parameters + ---------- + format : string + Data format for dict creation. Available formats are: `dv_up` with + all metadata for Dataverse api upload, and `all` with all attributes + set. + + Returns + ------- + dict + Data as dict. + + Examples + ------- + Check if metadata is valid for Dataverse api upload:: + + >>> from pyDataverse.models import Datafile + >>> df = Datafile() + >>> data = { + >>> 'pid': 'doi:10.11587/EVMUHP', + >>> 'description': 'Test file', + >>> 'filename': 'tests/data/datafile.txt' + >>> } + >>> df.set(data) + >>> data = df.dict() + >>> data['description'] + 'Test file' + + Todo + ------- + Validate standards. + + """ + data = {} + if format == 'dv_up': + if self.is_valid(): + for attr in self.__attr_valid_metadata: + if self.__getattribute__(attr) is not None: + data[attr] = self.__getattribute__(attr) + + return data + else: + print('dict can not be created. Data is not valid') + return None + elif format == 'all': + for attr in self.__attr_valid_class: + if self.__getattribute__(attr) is not None: + data[attr] = self.__getattribute__(attr) + return data + else: + # TODO: Exception + print('Format not right for dict.') + return None + + def json(self, format='dv_up'): + r"""Create json from attributes. + + Parameters + ---------- + format : string + Data format of input. Available formats are: `dv_up` for Dataverse + Api upload compatible format and `all` with all attributes named in + `__attr_valid_class`. + + Returns + ------- + string + json-formatted string of Dataverse metadata for api upload. + + Examples + ------- + Get dict of Dataverse metadata:: + + >>> from pyDataverse.models import Datafile + >>> df = Datafile() + >>> data = { + >>> 'pid': 'doi:10.11587/EVMUHP', + >>> 'description': 'Test file', + >>> 'filename': 'tests/data/datafile.txt' + >>> } + >>> df.set(data) + >>> df.dict() + {'description': 'Test file', + 'directoryLabel': None, + 'restrict': None} + + Todo + ------- + Validate standards. + Link to default json file + + """ + if format == 'dv_up': + data = self.dict('dv_up') + if data: + return dict_to_json(data) + else: + print('Dict can not be created') + return None + elif format == 'all': + data = self.dict('all') + if data: + return dict_to_json(data) + else: + print('Dict can not be created') + return None + else: + # TODO Exception + print('data format not valid.') + return None diff --git a/src/pyDataverse/utils.py b/src/pyDataverse/utils.py index 0f07f77..2e787fa 100644 --- a/src/pyDataverse/utils.py +++ b/src/pyDataverse/utils.py @@ -1,6 +1,7 @@ # !/usr/bin/env python # -*- coding: utf-8 -*- -"""Find out more at https://github.com/AUSSDA/pyDataverse.""" +"""Dataverse utility functions.""" +import csv import json @@ -45,7 +46,7 @@ def dict_to_json(data): """ try: - return json.dumps(data, ensure_ascii=False, indent=2) + return json.dumps(data, ensure_ascii=True, indent=2) except Exception as e: raise e @@ -118,7 +119,7 @@ def read_file_json(filename): """ try: - return json.loads(read_file(filename, 'r')) + return json_to_dict(read_file(filename, 'r')) except Exception as e: raise e @@ -138,3 +139,61 @@ def write_file_json(filename, data, mode='w'): """ write_file(filename, dict_to_json(data), mode) + + +def read_file_csv(filename): + """Read in CSV file. + + See more at `csv.reader() `_. + + Parameters + ---------- + filename : string + Full filename with path of file. + + Returns + ------- + reader + Reader object, which can be iterated over. + + """ + try: + with open(filename, newline='') as csvfile: + return csv.reader(csvfile, delimiter=',', quotechar='"') + except Exception as e: + raise e + finally: + csvfile.close() + + +def read_csv_to_dict(filename): + """Read in csv file and convert it into a list of dicts. + + This offers an easy import functionality of csv files with dataset metadata. + + Assumptions: + 1) The header rows contains the column names, named after Dataverse's + dataset attribute standard naming convention. + 2) One row contains one dataset + + After the import, the created dict then can directly be used to set + Dataset() attributes via ``Dataset.set(data)``. + + Parameters + ---------- + filename : string + Filename with full path. + + Returns + ------- + list + List with one dict per row (=dataset). The keys of the dicts are named + after the columen names, which must be named after the Dataverse + dataset metadata naming convention. + + """ + reader = csv.DictReader(open(filename), 'r') + data = [] + for row in reader: + data.append(dict(row)) + return data diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..4846bae --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,192 @@ +# !/usr/bin/env python +# -*- coding: utf-8 -*- +"""Find out more at https://github.com/AUSSDA/pyDataverse.""" +import json +import os +from pyDataverse.api import Api +import pytest + +TEST_DIR = os.path.dirname(os.path.realpath(__file__)) + + +@pytest.fixture(scope='module') +def api_connection(): + """Fixture, so set up an Api connection. + + Returns + ------- + Api + Api object. + + """ + api_token = os.environ['API_TOKEN'] + base_url = os.environ['BASE_URL'] + return Api(base_url, api_token) + + +def read_json(filename): + """Read in json file. + + Parameters + ---------- + filename : string + Filename with full path. + + Returns + ------- + dict + File content as dict. + + """ + return j2d(read_file(filename)) + + +def read_file(filename): + """Read in file. + + Parameters + ---------- + filename : string + Filename with full path. + + Returns + ------- + string + File content as string. + + """ + with open(filename, 'r') as f: + data = f.read() + return data + + +def write_file(filename, data): + """Write data to file. + + Parameters + ---------- + filename : string + Filename with full path. + data : string + File content as string. + + """ + with open(filename, 'w') as f: + f.write(data) + + +def write_json(filename, data): + """Write data to json file. + + Parameters + ---------- + filename : string + Filename with full path. + data : dict + File content as dict. + + """ + write_file(filename, d2j(data)) + + +def j2d(data): + """Convert json to dict. + + Parameters + ---------- + data : string + JSON-formatted string. + + Returns + ------- + dict + Data as dict. + + """ + return json.loads(data) + + +def d2j(data): + """Coinvert dict 2 json. + + Parameters + ---------- + data : dict + Data as dict. + + Returns + ------- + string + JSON-formatted string. + + """ + return json.dumps(data, ensure_ascii=False, indent=2) + + +@pytest.fixture +def import_dataverse_min_dict(): + """Import minimum Dataverse dict. + + Returns + ------- + dict + Minimum Dataverse metadata. + + """ + return read_json(TEST_DIR + '/data/dataverse_min.json') + + +@pytest.fixture +def import_dataset_min_dict(): + """Import dataset dict. + + Returns + ------- + dict + Dataset metadata. + + """ + data = { + 'license': 'CC0', + 'termsOfUse': 'CC0 Waiver', + 'termsOfAccess': 'Terms of Access', + 'citation_displayName': 'Citation Metadata', + 'title': 'Replication Data for: Title' + } + return data + + +@pytest.fixture +def import_datafile_min_dict(): + """Import minimum Datafile dict. + + Returns + ------- + dict + Minimum Datafile metadata. + + """ + data = { + 'pid': 'doi:10.11587/EVMUHP', + 'filename': 'tests/data/datafile.txt' + } + return data + + +@pytest.fixture +def import_datafile_full_dict(): + """Import full Datafile dict. + + Returns + ------- + dict + Full Datafile metadata. + + """ + data = { + 'pid': 'doi:10.11587/EVMUHP', + 'filename': 'tests/data/datafile.txt', + 'description': 'Test datafile', + 'restrict': False + } + return data diff --git a/tests/data/create_dataverse_2.json b/tests/data/create_dataverse_2.json deleted file mode 100644 index 5439c55..0000000 --- a/tests/data/create_dataverse_2.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "alias": "test-pyDataverse-2", - "name": "Test pyDataverse 2", - "dataverseContacts": [ - { - "contactEmail": "info@aussda.at" - } - ] -} diff --git a/tests/data/create_dataverse_3.json b/tests/data/create_dataverse_3.json deleted file mode 100644 index 45d891b..0000000 --- a/tests/data/create_dataverse_3.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "alias": "test-pyDataverse-3", - "name": "Test pyDataverse 3", - "dataverseContacts": [ - { - "contactEmail": "info@aussda.at" - } - ] -} diff --git a/tests/data/dataset_full.json b/tests/data/dataset_full.json new file mode 100644 index 0000000..1a55795 --- /dev/null +++ b/tests/data/dataset_full.json @@ -0,0 +1,1139 @@ +{ + "datasetVersion": { + "license": "CC0", + "termsOfUse": "CC0 Waiver", + "termsOfAccess": "Terms of Access", + "metadataBlocks": { + "citation": { + "displayName": "Citation Metadata", + "fields": [ + { + "typeName": "title", + "multiple": false, + "typeClass": "primitive", + "value": "Replication Data for: Title" + }, + { + "typeName": "subtitle", + "multiple": false, + "typeClass": "primitive", + "value": "Subtitle" + }, + { + "typeName": "alternativeTitle", + "multiple": false, + "typeClass": "primitive", + "value": "Alternative Title" + }, + { + "typeName": "alternativeURL", + "multiple": false, + "typeClass": "primitive", + "value": "http://AlternativeURL.org" + }, + { + "typeName": "otherId", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "otherIdAgency": { + "typeName": "otherIdAgency", + "multiple": false, + "typeClass": "primitive", + "value": "OtherIDAgency1" + }, + "otherIdValue": { + "typeName": "otherIdValue", + "multiple": false, + "typeClass": "primitive", + "value": "OtherIDIdentifier1" + } + } + ] + }, + { + "typeName": "author", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "authorName": { + "typeName": "authorName", + "multiple": false, + "typeClass": "primitive", + "value": "LastAuthor1, FirstAuthor1" + }, + "authorAffiliation": { + "typeName": "authorAffiliation", + "multiple": false, + "typeClass": "primitive", + "value": "AuthorAffiliation1" + }, + "authorIdentifierScheme": { + "typeName": "authorIdentifierScheme", + "multiple": false, + "typeClass": "controlledVocabulary", + "value": "ORCID" + }, + "authorIdentifier": { + "typeName": "authorIdentifier", + "multiple": false, + "typeClass": "primitive", + "value": "AuthorIdentifier1" + } + } + ] + }, + { + "typeName": "datasetContact", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "datasetContactName": { + "typeName": "datasetContactName", + "multiple": false, + "typeClass": "primitive", + "value": "LastContact1, FirstContact1" + }, + "datasetContactAffiliation": { + "typeName": "datasetContactAffiliation", + "multiple": false, + "typeClass": "primitive", + "value": "ContactAffiliation1" + }, + "datasetContactEmail": { + "typeName": "datasetContactEmail", + "multiple": false, + "typeClass": "primitive", + "value": "ContactEmail1@mailinator.com" + } + } + ] + }, + { + "typeName": "dsDescription", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "dsDescriptionValue": { + "typeName": "dsDescriptionValue", + "multiple": false, + "typeClass": "primitive", + "value": "DescriptionText2" + }, + "dsDescriptionDate": { + "typeName": "dsDescriptionDate", + "multiple": false, + "typeClass": "primitive", + "value": "1000-02-02" + } + } + ] + }, + { + "typeName": "subject", + "multiple": true, + "typeClass": "controlledVocabulary", + "value": [ + "Agricultural Sciences", + "Business and Management", + "Engineering", + "Law" + ] + }, + { + "typeName": "keyword", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "keywordValue": { + "typeName": "keywordValue", + "multiple": false, + "typeClass": "primitive", + "value": "KeywordTerm1" + }, + "keywordVocabulary": { + "typeName": "keywordVocabulary", + "multiple": false, + "typeClass": "primitive", + "value": "KeywordVocabulary1" + }, + "keywordVocabularyURI": { + "typeName": "keywordVocabularyURI", + "multiple": false, + "typeClass": "primitive", + "value": "http://KeywordVocabularyURL1.org" + } + } + ] + }, + { + "typeName": "topicClassification", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "topicClassValue": { + "typeName": "topicClassValue", + "multiple": false, + "typeClass": "primitive", + "value": "Topic Class Value1" + }, + "topicClassVocab": { + "typeName": "topicClassVocab", + "multiple": false, + "typeClass": "primitive", + "value": "Topic Classification Vocabulary" + } + } + ] + }, + { + "typeName": "publication", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "publicationCitation": { + "typeName": "publicationCitation", + "multiple": false, + "typeClass": "primitive", + "value": "RelatedPublicationCitation1" + }, + "publicationIDType": { + "typeName": "publicationIDType", + "multiple": false, + "typeClass": "controlledVocabulary", + "value": "ark" + }, + "publicationIDNumber": { + "typeName": "publicationIDNumber", + "multiple": false, + "typeClass": "primitive", + "value": "RelatedPublicationIDNumber1" + }, + "publicationURL": { + "typeName": "publicationURL", + "multiple": false, + "typeClass": "primitive", + "value": "http://RelatedPublicationURL1.org" + } + } + ] + }, + { + "typeName": "notesText", + "multiple": false, + "typeClass": "primitive", + "value": "Notes1" + }, + { + "typeName": "producer", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "producerName": { + "typeName": "producerName", + "multiple": false, + "typeClass": "primitive", + "value": "LastProducer1, FirstProducer1" + }, + "producerAffiliation": { + "typeName": "producerAffiliation", + "multiple": false, + "typeClass": "primitive", + "value": "ProducerAffiliation1" + }, + "producerAbbreviation": { + "typeName": "producerAbbreviation", + "multiple": false, + "typeClass": "primitive", + "value": "ProducerAbbreviation1" + }, + "producerURL": { + "typeName": "producerURL", + "multiple": false, + "typeClass": "primitive", + "value": "http://ProducerURL1.org" + }, + "producerLogoURL": { + "typeName": "producerLogoURL", + "multiple": false, + "typeClass": "primitive", + "value": "http://ProducerLogoURL1.org" + } + } + ] + }, + { + "typeName": "productionDate", + "multiple": false, + "typeClass": "primitive", + "value": "1003-01-01" + }, + { + "typeName": "productionPlace", + "multiple": false, + "typeClass": "primitive", + "value": "ProductionPlace" + }, + { + "typeName": "contributor", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "contributorType": { + "typeName": "contributorType", + "multiple": false, + "typeClass": "controlledVocabulary", + "value": "Data Collector" + }, + "contributorName": { + "typeName": "contributorName", + "multiple": false, + "typeClass": "primitive", + "value": "LastContributor1, FirstContributor1" + } + } + ] + }, + { + "typeName": "grantNumber", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "grantNumberAgency": { + "typeName": "grantNumberAgency", + "multiple": false, + "typeClass": "primitive", + "value": "GrantInformationGrantAgency1" + }, + "grantNumberValue": { + "typeName": "grantNumberValue", + "multiple": false, + "typeClass": "primitive", + "value": "GrantInformationGrantNumber1" + } + } + ] + }, + { + "typeName": "distributor", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "distributorName": { + "typeName": "distributorName", + "multiple": false, + "typeClass": "primitive", + "value": "LastDistributor1, FirstDistributor1" + }, + "distributorAffiliation": { + "typeName": "distributorAffiliation", + "multiple": false, + "typeClass": "primitive", + "value": "DistributorAffiliation1" + }, + "distributorAbbreviation": { + "typeName": "distributorAbbreviation", + "multiple": false, + "typeClass": "primitive", + "value": "DistributorAbbreviation1" + }, + "distributorURL": { + "typeName": "distributorURL", + "multiple": false, + "typeClass": "primitive", + "value": "http://DistributorURL1.org" + }, + "distributorLogoURL": { + "typeName": "distributorLogoURL", + "multiple": false, + "typeClass": "primitive", + "value": "http://DistributorLogoURL1.org" + } + } + ] + }, + { + "typeName": "distributionDate", + "multiple": false, + "typeClass": "primitive", + "value": "1004-01-01" + }, + { + "typeName": "depositor", + "multiple": false, + "typeClass": "primitive", + "value": "LastDepositor, FirstDepositor" + }, + { + "typeName": "dateOfDeposit", + "multiple": false, + "typeClass": "primitive", + "value": "1002-01-01" + }, + { + "typeName": "timePeriodCovered", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "timePeriodCoveredStart": { + "typeName": "timePeriodCoveredStart", + "multiple": false, + "typeClass": "primitive", + "value": "1005-01-01" + }, + "timePeriodCoveredEnd": { + "typeName": "timePeriodCoveredEnd", + "multiple": false, + "typeClass": "primitive", + "value": "1005-01-02" + } + } + ] + }, + { + "typeName": "dateOfCollection", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "dateOfCollectionStart": { + "typeName": "dateOfCollectionStart", + "multiple": false, + "typeClass": "primitive", + "value": "1006-01-01" + }, + "dateOfCollectionEnd": { + "typeName": "dateOfCollectionEnd", + "multiple": false, + "typeClass": "primitive", + "value": "1006-01-01" + } + } + ] + }, + { + "typeName": "kindOfData", + "multiple": true, + "typeClass": "primitive", + "value": [ + "KindOfData1", + "KindOfData2" + ] + }, + { + "typeName": "series", + "multiple": false, + "typeClass": "compound", + "value": { + "seriesName": { + "typeName": "seriesName", + "multiple": false, + "typeClass": "primitive", + "value": "SeriesName" + }, + "seriesInformation": { + "typeName": "seriesInformation", + "multiple": false, + "typeClass": "primitive", + "value": "SeriesInformation" + } + } + }, + { + "typeName": "software", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "softwareName": { + "typeName": "softwareName", + "multiple": false, + "typeClass": "primitive", + "value": "SoftwareName1" + }, + "softwareVersion": { + "typeName": "softwareVersion", + "multiple": false, + "typeClass": "primitive", + "value": "SoftwareVersion1" + } + } + ] + }, + { + "typeName": "relatedMaterial", + "multiple": true, + "typeClass": "primitive", + "value": [ + "RelatedMaterial1", + "RelatedMaterial2" + ] + }, + { + "typeName": "relatedDatasets", + "multiple": true, + "typeClass": "primitive", + "value": [ + "RelatedDatasets1", + "RelatedDatasets2" + ] + }, + { + "typeName": "otherReferences", + "multiple": true, + "typeClass": "primitive", + "value": [ + "OtherReferences1", + "OtherReferences2" + ] + }, + { + "typeName": "dataSources", + "multiple": true, + "typeClass": "primitive", + "value": [ + "DataSources1", + "DataSources2" + ] + }, + { + "typeName": "originOfSources", + "multiple": false, + "typeClass": "primitive", + "value": "OriginOfSources" + }, + { + "typeName": "characteristicOfSources", + "multiple": false, + "typeClass": "primitive", + "value": "CharacteristicOfSourcesNoted" + }, + { + "typeName": "accessToSources", + "multiple": false, + "typeClass": "primitive", + "value": "DocumentationAndAccessToSources" + } + ] + }, + "geospatial": { + "displayName": "Geospatial Metadata", + "fields": [ + { + "typeName": "geographicCoverage", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "country": { + "typeName": "country", + "multiple": false, + "typeClass": "controlledVocabulary", + "value": "Afghanistan" + }, + "state": { + "typeName": "state", + "multiple": false, + "typeClass": "primitive", + "value": "GeographicCoverageStateProvince1" + }, + "city": { + "typeName": "city", + "multiple": false, + "typeClass": "primitive", + "value": "GeographicCoverageCity1" + }, + "otherGeographicCoverage": { + "typeName": "otherGeographicCoverage", + "multiple": false, + "typeClass": "primitive", + "value": "GeographicCoverageOther1" + } + } + ] + }, + { + "typeName": "geographicUnit", + "multiple": true, + "typeClass": "primitive", + "value": [ + "GeographicUnit1", + "GeographicUnit2" + ] + }, + { + "typeName": "geographicBoundingBox", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "westLongitude": { + "typeName": "westLongitude", + "multiple": false, + "typeClass": "primitive", + "value": "10" + }, + "eastLongitude": { + "typeName": "eastLongitude", + "multiple": false, + "typeClass": "primitive", + "value": "20" + }, + "northLongitude": { + "typeName": "northLongitude", + "multiple": false, + "typeClass": "primitive", + "value": "30" + }, + "southLongitude": { + "typeName": "southLongitude", + "multiple": false, + "typeClass": "primitive", + "value": "40" + } + } + ] + } + ] + }, + "socialscience": { + "displayName": "Social Science and Humanities Metadata", + "fields": [ + { + "typeName": "unitOfAnalysis", + "multiple": true, + "typeClass": "primitive", + "value": [ + "UnitOfAnalysis1", + "UnitOfAnalysis2" + ] + }, + { + "typeName": "universe", + "multiple": true, + "typeClass": "primitive", + "value": [ + "Universe1", + "Universe2" + ] + }, + { + "typeName": "timeMethod", + "multiple": false, + "typeClass": "primitive", + "value": "TimeMethod" + }, + { + "typeName": "dataCollector", + "multiple": false, + "typeClass": "primitive", + "value": "LastDataCollector1, FirstDataCollector1" + }, + { + "typeName": "collectorTraining", + "multiple": false, + "typeClass": "primitive", + "value": "CollectorTraining" + }, + { + "typeName": "frequencyOfDataCollection", + "multiple": false, + "typeClass": "primitive", + "value": "Frequency" + }, + { + "typeName": "samplingProcedure", + "multiple": false, + "typeClass": "primitive", + "value": "SamplingProcedure" + }, + { + "typeName": "targetSampleSize", + "multiple": false, + "typeClass": "compound", + "value": { + "targetSampleActualSize": { + "typeName": "targetSampleActualSize", + "multiple": false, + "typeClass": "primitive", + "value": "100" + }, + "targetSampleSizeFormula": { + "typeName": "targetSampleSizeFormula", + "multiple": false, + "typeClass": "primitive", + "value": "TargetSampleSizeFormula" + } + } + }, + { + "typeName": "deviationsFromSampleDesign", + "multiple": false, + "typeClass": "primitive", + "value": "MajorDeviationsForSampleDesign" + }, + { + "typeName": "collectionMode", + "multiple": false, + "typeClass": "primitive", + "value": "CollectionMode" + }, + { + "typeName": "researchInstrument", + "multiple": false, + "typeClass": "primitive", + "value": "TypeOfResearchInstrument" + }, + { + "typeName": "dataCollectionSituation", + "multiple": false, + "typeClass": "primitive", + "value": "CharacteristicsOfDataCollectionSituation" + }, + { + "typeName": "actionsToMinimizeLoss", + "multiple": false, + "typeClass": "primitive", + "value": "ActionsToMinimizeLosses" + }, + { + "typeName": "controlOperations", + "multiple": false, + "typeClass": "primitive", + "value": "ControlOperations" + }, + { + "typeName": "weighting", + "multiple": false, + "typeClass": "primitive", + "value": "Weighting" + }, + { + "typeName": "cleaningOperations", + "multiple": false, + "typeClass": "primitive", + "value": "CleaningOperations" + }, + { + "typeName": "datasetLevelErrorNotes", + "multiple": false, + "typeClass": "primitive", + "value": "StudyLevelErrorNotes" + }, + { + "typeName": "responseRate", + "multiple": false, + "typeClass": "primitive", + "value": "ResponseRate" + }, + { + "typeName": "samplingErrorEstimates", + "multiple": false, + "typeClass": "primitive", + "value": "EstimatesOfSamplingError" + }, + { + "typeName": "otherDataAppraisal", + "multiple": false, + "typeClass": "primitive", + "value": "OtherFormsOfDataAppraisal" + }, + { + "typeName": "socialScienceNotes", + "multiple": false, + "typeClass": "compound", + "value": { + "socialScienceNotesType": { + "typeName": "socialScienceNotesType", + "multiple": false, + "typeClass": "primitive", + "value": "NotesType" + }, + "socialScienceNotesSubject": { + "typeName": "socialScienceNotesSubject", + "multiple": false, + "typeClass": "primitive", + "value": "NotesSubject" + }, + "socialScienceNotesText": { + "typeName": "socialScienceNotesText", + "multiple": false, + "typeClass": "primitive", + "value": "NotesText" + } + } + } + ] + }, + "astrophysics": { + "displayName": "Astronomy and Astrophysics Metadata", + "fields": [ + { + "typeName": "astroType", + "multiple": true, + "typeClass": "controlledVocabulary", + "value": [ + "Image", + "Mosaic", + "EventList", + "Cube" + ] + }, + { + "typeName": "astroFacility", + "multiple": true, + "typeClass": "primitive", + "value": [ + "Facility1", + "Facility2" + ] + }, + { + "typeName": "astroInstrument", + "multiple": true, + "typeClass": "primitive", + "value": [ + "Instrument1", + "Instrument2" + ] + }, + { + "typeName": "astroObject", + "multiple": true, + "typeClass": "primitive", + "value": [ + "Object1", + "Object2" + ] + }, + { + "typeName": "resolution.Spatial", + "multiple": false, + "typeClass": "primitive", + "value": "SpatialResolution" + }, + { + "typeName": "resolution.Spectral", + "multiple": false, + "typeClass": "primitive", + "value": "SpectralResolution" + }, + { + "typeName": "resolution.Temporal", + "multiple": false, + "typeClass": "primitive", + "value": "TimeResolution" + }, + { + "typeName": "coverage.Spectral.Bandpass", + "multiple": true, + "typeClass": "primitive", + "value": [ + "Bandpass1", + "Bandpass2" + ] + }, + { + "typeName": "coverage.Spectral.CentralWavelength", + "multiple": true, + "typeClass": "primitive", + "value": [ + "3001", + "3002" + ] + }, + { + "typeName": "coverage.Spectral.Wavelength", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "coverage.Spectral.MinimumWavelength": { + "typeName": "coverage.Spectral.MinimumWavelength", + "multiple": false, + "typeClass": "primitive", + "value": "4001" + }, + "coverage.Spectral.MaximumWavelength": { + "typeName": "coverage.Spectral.MaximumWavelength", + "multiple": false, + "typeClass": "primitive", + "value": "4002" + } + }, + { + "coverage.Spectral.MinimumWavelength": { + "typeName": "coverage.Spectral.MinimumWavelength", + "multiple": false, + "typeClass": "primitive", + "value": "4003" + }, + "coverage.Spectral.MaximumWavelength": { + "typeName": "coverage.Spectral.MaximumWavelength", + "multiple": false, + "typeClass": "primitive", + "value": "4004" + } + } + ] + }, + { + "typeName": "coverage.Temporal", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "coverage.Temporal.StartTime": { + "typeName": "coverage.Temporal.StartTime", + "multiple": false, + "typeClass": "primitive", + "value": "1007-01-01" + }, + "coverage.Temporal.StopTime": { + "typeName": "coverage.Temporal.StopTime", + "multiple": false, + "typeClass": "primitive", + "value": "1007-01-02" + } + }, + { + "coverage.Temporal.StartTime": { + "typeName": "coverage.Temporal.StartTime", + "multiple": false, + "typeClass": "primitive", + "value": "1007-02-01" + }, + "coverage.Temporal.StopTime": { + "typeName": "coverage.Temporal.StopTime", + "multiple": false, + "typeClass": "primitive", + "value": "1007-02-02" + } + } + ] + }, + { + "typeName": "coverage.Spatial", + "multiple": true, + "typeClass": "primitive", + "value": [ + "SkyCoverage1", + "SkyCoverage2" + ] + }, + { + "typeName": "coverage.Depth", + "multiple": false, + "typeClass": "primitive", + "value": "200" + }, + { + "typeName": "coverage.ObjectDensity", + "multiple": false, + "typeClass": "primitive", + "value": "300" + }, + { + "typeName": "coverage.ObjectCount", + "multiple": false, + "typeClass": "primitive", + "value": "400" + }, + { + "typeName": "coverage.SkyFraction", + "multiple": false, + "typeClass": "primitive", + "value": "500" + }, + { + "typeName": "coverage.Polarization", + "multiple": false, + "typeClass": "primitive", + "value": "Polarization" + }, + { + "typeName": "redshiftType", + "multiple": false, + "typeClass": "primitive", + "value": "RedshiftType" + }, + { + "typeName": "resolution.Redshift", + "multiple": false, + "typeClass": "primitive", + "value": "600" + }, + { + "typeName": "coverage.RedshiftValue", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "coverage.Redshift.MinimumValue": { + "typeName": "coverage.Redshift.MinimumValue", + "multiple": false, + "typeClass": "primitive", + "value": "701" + }, + "coverage.Redshift.MaximumValue": { + "typeName": "coverage.Redshift.MaximumValue", + "multiple": false, + "typeClass": "primitive", + "value": "702" + } + } + ] + } + ] + }, + "biomedical": { + "displayName": "Life Sciences Metadata", + "fields": [ + { + "typeName": "studyDesignType", + "multiple": true, + "typeClass": "controlledVocabulary", + "value": [ + "Case Control", + "Cross Sectional", + "Cohort Study", + "Not Specified" + ] + }, + { + "typeName": "studyFactorType", + "multiple": true, + "typeClass": "controlledVocabulary", + "value": [ + "Age", + "Biomarkers", + "Cell Surface Markers", + "Developmental Stage" + ] + }, + { + "typeName": "studyAssayOrganism", + "multiple": true, + "typeClass": "controlledVocabulary", + "value": [ + "Arabidopsis thaliana", + "Bos taurus", + "Caenorhabditis elegans", + "Danio rerio (zebrafish)" + ] + }, + { + "typeName": "studyAssayOtherOrganism", + "multiple": true, + "typeClass": "primitive", + "value": [ + "OtherOrganism1", + "OtherOrganism2" + ] + }, + { + "typeName": "studyAssayMeasurementType", + "multiple": true, + "typeClass": "controlledVocabulary", + "value": [ + "cell counting", + "cell sorting", + "clinical chemistry analysis", + "DNA methylation profiling" + ] + }, + { + "typeName": "studyAssayOtherMeasurmentType", + "multiple": true, + "typeClass": "primitive", + "value": [ + "OtherMeasurementType1", + "OtherMeasurementType2" + ] + }, + { + "typeName": "studyAssayTechnologyType", + "multiple": true, + "typeClass": "controlledVocabulary", + "value": [ + "culture based drug susceptibility testing, single concentration", + "culture based drug susceptibility testing, two concentrations", + "culture based drug susceptibility testing, three or more concentrations (minimium inhibitory concentration measurement)", + "flow cytometry" + ] + }, + { + "typeName": "studyAssayPlatform", + "multiple": true, + "typeClass": "controlledVocabulary", + "value": [ + "210-MS GC Ion Trap (Varian)", + "220-MS GC Ion Trap (Varian)", + "225-MS GC Ion Trap (Varian)", + "300-MS quadrupole GC/MS (Varian)" + ] + }, + { + "typeName": "studyAssayCellType", + "multiple": true, + "typeClass": "primitive", + "value": [ + "CellType1", + "CellType2" + ] + } + ] + }, + "journal": { + "displayName": "Journal Metadata", + "fields": [ + { + "typeName": "journalVolumeIssue", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "journalVolume": { + "typeName": "journalVolume", + "multiple": false, + "typeClass": "primitive", + "value": "JournalVolume1" + }, + "journalIssue": { + "typeName": "journalIssue", + "multiple": false, + "typeClass": "primitive", + "value": "JournalIssue1" + }, + "journalPubDate": { + "typeName": "journalPubDate", + "multiple": false, + "typeClass": "primitive", + "value": "1008-01-01" + } + } + ] + }, + { + "typeName": "journalArticleType", + "multiple": false, + "typeClass": "controlledVocabulary", + "value": "abstract" + } + ] + } + } + } +} diff --git a/tests/data/create_dataset.json b/tests/data/dataset_min.json similarity index 100% rename from tests/data/create_dataset.json rename to tests/data/dataset_min.json diff --git a/tests/data/create_dataverse.json b/tests/data/dataverse_min.json similarity index 100% rename from tests/data/create_dataverse.json rename to tests/data/dataverse_min.json diff --git a/tests/data/add-user.json b/tests/data/user.json similarity index 100% rename from tests/data/add-user.json rename to tests/data/user.json diff --git a/tests/test_api.py b/tests/test_api.py index 616356d..73f08e4 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -1,26 +1,17 @@ # coding: utf-8 from datetime import datetime -from datetime import timedelta import os from pyDataverse.api import Api from pyDataverse.exceptions import ApiResponseError from pyDataverse.exceptions import ApiUrlError +from pyDataverse.utils import dict_to_json import pytest from requests import Response from time import sleep TEST_DIR = os.path.dirname(os.path.realpath(__file__)) -SLEEP_TIME = 1 - -if 'API_TOKEN' in os.environ: - API_TOKEN = os.environ['API_TOKEN'] -else: - print('ERROR: Environment variable API_TOKEN for test missing.') -if 'BASE_URL' in os.environ: - BASE_URL = os.environ['BASE_URL'] -else: - print('ERROR: Environment variable BASE_URL for test missing.') +SLEEP_TIME = 0.1 class TestApiConnect(object): @@ -28,17 +19,16 @@ class TestApiConnect(object): def test_api_connect(self): """Test successfull connection without api_token.""" - api = Api(BASE_URL) + api = Api(os.environ['BASE_URL']) sleep(SLEEP_TIME) - time_window_start = datetime.now() - timedelta(seconds=10) + assert isinstance(api, Api) assert not api.api_token assert api.api_version == 'v1' - assert api.conn_started > time_window_start assert isinstance(api.conn_started, datetime) - assert api.base_url == BASE_URL + assert api.base_url == os.environ['BASE_URL'] assert api.native_api_base_url == '{0}/api/{1}'.format( - BASE_URL, api.api_version) + os.environ['BASE_URL'], api.api_version) assert api.status == 'OK' def test_api_connect_base_url_wrong(self): @@ -48,10 +38,9 @@ def test_api_connect_base_url_wrong(self): base_url = 'http://wikipedia.org' api = Api(base_url) sleep(SLEEP_TIME) - time_window_start = datetime.now() - timedelta(seconds=10) + assert not api.api_token assert api.api_version == 'v1' - assert api.conn_started > time_window_start assert api.base_url == 'http://wikipedia.org' assert api.native_api_base_url == 'http://wikipedia.org/api/v1' assert api.status == 'ERROR' @@ -61,10 +50,9 @@ def test_api_connect_base_url_wrong(self): base_url = None api = Api(base_url) sleep(SLEEP_TIME) - time_window_start = datetime.now() - timedelta(seconds=10) + assert not api.api_token assert api.api_version == 'v1' - assert api.conn_started > time_window_start assert not api.base_url assert not api.native_api_base_url assert api.status == 'ERROR' @@ -79,25 +67,72 @@ class TestApiRequests(object): def setup_class(cls): """Create the api connection for later use.""" cls.dataverse_id = 'test-pyDataverse' - cls.filename_dataverse = TEST_DIR+'/data/create_dataverse.json' - cls.filename_dataset = TEST_DIR+'/data/create_dataset.json' - cls.api = Api(BASE_URL, api_token=API_TOKEN) - sleep(SLEEP_TIME) - assert cls.api - assert cls.api.api_token - assert cls.api.base_url + cls.dataset_id = None + + def test_create_dataverse(self, import_dataverse_min_dict, api_connection): + """Test successfull `.create_dataverse()` request`.""" + if not os.environ.get('TRAVIS'): + api = api_connection + metadata = import_dataverse_min_dict + resp = api.create_dataverse( + self.dataverse_id, dict_to_json(metadata)) + sleep(SLEEP_TIME) + + assert isinstance(resp, Response) + assert api.get_dataverse(self.dataverse_id).json() + + def test_create_dataset(self, import_dataset_min_dict, api_connection): + """Test successfull `.create_dataset()` request`.""" + if not os.environ.get('TRAVIS'): + api = api_connection + metadata = import_dataset_min_dict + resp = api.create_dataset(':root', dict_to_json(metadata)) + sleep(SLEEP_TIME) + TestApiRequests.dataset_id = resp.json()['data']['persistentId'] + + assert isinstance(resp, Response) - def test_make_get_request(self): - """Test successfull `.make_get_request()` request.""" + def test_get_dataset(self, api_connection): + """Test successfull `.get_dataset()` request`.""" + if not os.environ.get('TRAVIS'): + api = api_connection + resp = api.get_dataset(TestApiRequests.dataset_id) + sleep(SLEEP_TIME) + + assert isinstance(resp, Response) + + def test_delete_dataset(self, api_connection): + """Test successfull `.delete_dataset()` request`.""" + if not os.environ.get('TRAVIS'): + api = api_connection + resp = api.delete_dataset(TestApiRequests.dataset_id) + sleep(SLEEP_TIME) + assert isinstance(resp, Response) + + def test_delete_dataverse(self, api_connection): + """Test successfull `.delete_dataverse()` request`.""" + if not os.environ.get('TRAVIS'): + api = api_connection + resp = api.delete_dataverse(self.dataverse_id) + sleep(SLEEP_TIME) + + assert isinstance(resp, Response) + + def test_get_request(self, api_connection): + """Test successfull `.get_request()` request.""" # TODO: test params und auth default + api = api_connection query_str = '/info/server' - resp = self.api.make_get_request(query_str) + resp = api.get_request(query_str) sleep(SLEEP_TIME) - assert self.api.status == 'OK' + + assert api.status == 'OK' assert isinstance(resp, Response) - def test_get_dataverse(self): + def test_get_dataverse(self, api_connection): """Test successfull `.get_dataverse()` request`.""" - resp = self.api.get_dataverse(':root') + api = api_connection + resp = api.get_dataverse(':root') sleep(SLEEP_TIME) + assert isinstance(resp, Response) diff --git a/tests/test_models_datafile.py b/tests/test_models_datafile.py new file mode 100644 index 0000000..cbe4f3e --- /dev/null +++ b/tests/test_models_datafile.py @@ -0,0 +1,228 @@ +# !/usr/bin/env python +# -*- coding: utf-8 -*- +"""Datafile data model tests.""" +import os +from pyDataverse.models import Datafile + +TEST_DIR = os.path.dirname(os.path.realpath(__file__)) + + +class TestDatafile(object): + """Tests for Datafile().""" + + def test_datafile_init(self): + """Test Datafile.__init__().""" + df = Datafile() + + assert not df.pid + assert not df.filename + assert not df.description + assert not df.restrict + + df = Datafile('tests/data/datafile.txt', 'doi:10.11587/EVMUHP') + + assert df.pid == 'doi:10.11587/EVMUHP' + assert df.filename == 'tests/data/datafile.txt' + assert not df.description + assert not df.restrict + + def test_datafile_set_dv_up(self, import_datafile_full_dict): + """Test Datafile.set() with format=`dv_up`. + + Parameters + ---------- + import_datafile_full_dict : dict + Fixture, which returns a flat datafile dict(). + + """ + data = import_datafile_full_dict + df = Datafile() + df.set(data) + + assert df.pid == 'doi:10.11587/EVMUHP' + assert df.filename == 'tests/data/datafile.txt' + assert df.description == 'Test datafile' + assert not df.restrict + + def test_datafile_is_valid_valid(self, import_datafile_full_dict): + """Test Datafile.is_valid() with valid data. + + Parameters + ---------- + import_datafile_min_dict : dict + Fixture, which returns a flat datafile dict(). + + """ + data = import_datafile_full_dict + df = Datafile() + df.set(data) + + assert df.pid == 'doi:10.11587/EVMUHP' + assert df.filename == 'tests/data/datafile.txt' + assert df.description == 'Test datafile' + assert not df.restrict + assert df.is_valid() + + def test_datafile_is_valid_not(self, import_datafile_full_dict): + """Test Datafile.is_valid() with non-valid data. + + Parameters + ---------- + import_datafile_min_dict : dict + Fixture, which returns a flat datafile dict(). + + """ + data = import_datafile_full_dict + df = Datafile() + df.set(data) + df.filename = None + + assert df.pid == 'doi:10.11587/EVMUHP' + assert not df.filename + assert df.description == 'Test datafile' + assert not df.restrict + assert not df.is_valid() + + def test_datafile_dict_dv_up_valid(self, import_datafile_full_dict): + """Test Datafile.dict() with format=`dv_up` and valid data. + + Parameters + ---------- + import_datafile_min_dict : dict + Fixture, which returns a flat dataset dict(). + + """ + data = import_datafile_full_dict + df = Datafile() + df.set(data) + data = df.dict() + + assert df.dict('dv_up') + assert data + assert isinstance(data, dict) + assert data['pid'] == 'doi:10.11587/EVMUHP' + assert data['description'] == 'Test datafile' + print(data) + assert not data['restrict'] + + def test_datafile_dict_all_valid(self, import_datafile_full_dict): + """Test Datafile.dict() with format=`all` and valid data. + + Parameters + ---------- + import_datafile_full_dict : dict + Fixture, which returns a flat dataset dict(). + + """ + data = import_datafile_full_dict + df = Datafile() + df.set(data) + data = df.dict('all') + + assert data + assert isinstance(data, dict) + assert data['pid'] == 'doi:10.11587/EVMUHP' + assert data['filename'] == 'tests/data/datafile.txt' + assert data['description'] == 'Test datafile' + assert not data['restrict'] + + def test_datafile_dict_format_wrong(self, import_datafile_full_dict): + """Test Datafile.dict() with non-valid format. + + Parameters + ---------- + import_datafile_min_dict : dict + Fixture, which returns a flat dataset dict(). + + """ + data = import_datafile_full_dict + df = Datafile() + df.set(data) + data = df.dict('wrong') + + assert not data + + def test_datafile_dict_dv_up_valid_not(self, import_datafile_min_dict): + """Test Datafile.dict() with format=`dv_up` and non-valid data. + + Parameters + ---------- + import_datafile_min_dict : dict + Fixture, which returns a flat dataset dict(). + + """ + data = import_datafile_min_dict + df = Datafile() + df.set(data) + df.pid = None + + assert not df.is_valid() + assert df.filename == 'tests/data/datafile.txt' + + def test_datafile_json_dv_up_valid(self, import_datafile_min_dict): + """Test Datafile.json() with format=`dv_up` and valid data. + + Parameters + ---------- + import_datafile_min_dict : dict + Fixture, which returns a flat dataset dict(). + + """ + data = import_datafile_min_dict + df = Datafile() + df.set(data) + data = df.json() + + assert data + assert isinstance(data, str) + + def test_datafile_json_dv_up_valid_not(self, import_datafile_min_dict): + """Test Datafile.json() with format=`dv_up` and non-valid data. + + Parameters + ---------- + import_datafile_min_dict : dict + Fixture, which returns a flat dataset dict(). + + """ + data = import_datafile_min_dict + df = Datafile() + df.set(data) + df.filename = None + + assert not df.is_valid() + print(df.json('dv_up')) + assert not df.json('dv_up') + + def test_datafile_json_all_valid(self, import_datafile_full_dict): + """Test Datafile.json() with format=`all` and valid data. + + Parameters + ---------- + import_datafile_full_dict : dict + Fixture, which returns a flat dataset dict(). + + """ + data = import_datafile_full_dict + df = Datafile() + df.set(data) + data = df.json('all') + + assert data + assert isinstance(data, str) + + def test_datafile_json_format_wrong_valid(self, import_datafile_min_dict): + """Test Datafile.json() with non-valid format and valid data. + + Parameters + ---------- + import_datafile_min_dict : dict + Fixture, which returns a flat dataset dict(). + + """ + data = import_datafile_min_dict + df = Datafile() + df.set(data) + data = df.json('wrong') + + assert not data diff --git a/tests/test_models_dataset.py b/tests/test_models_dataset.py new file mode 100644 index 0000000..6227450 --- /dev/null +++ b/tests/test_models_dataset.py @@ -0,0 +1,447 @@ +# !/usr/bin/env python +# -*- coding: utf-8 -*- +"""Dataset data model tests.""" +import os +from pyDataverse.models import Dataset + +TEST_DIR = os.path.dirname(os.path.realpath(__file__)) + + +class TestDataset(object): + """Tests for Dataset().""" + + def test_dataset_init(self): + """Test Dataset.__init__().""" + ds = Dataset() + + assert isinstance(ds.datafiles, list) + assert len(ds.datafiles) == 0 + + """Metadata: dataset""" + assert not ds.license + assert not ds.termsOfUse + assert not ds.termsOfAccess + + """Metadata: citation""" + assert not ds.citation_displayName + assert not ds.title + assert not ds.subtitle + assert not ds.alternativeTitle + assert not ds.alternativeURL + assert isinstance(ds.otherId, list) + assert len(ds.otherId) == 0 + assert isinstance(ds.author, list) + assert len(ds.author) == 0 + assert isinstance(ds.datasetContact, list) + assert len(ds.datasetContact) == 0 + assert isinstance(ds.dsDescription, list) + assert len(ds.dsDescription) == 0 + assert isinstance(ds.subject, list) + assert len(ds.subject) == 0 + assert isinstance(ds.subject, list) + assert len(ds.subject) == 0 + assert isinstance(ds.topicClassification, list) + assert len(ds.topicClassification) == 0 + assert isinstance(ds.publication, list) + assert len(ds.publication) == 0 + assert not ds.notesText + assert isinstance(ds.producer, list) + assert len(ds.producer) == 0 + assert not ds.productionDate + assert not ds.productionPlace + assert isinstance(ds.contributor, list) + assert len(ds.contributor) == 0 + assert isinstance(ds.grantNumber, list) + assert len(ds.grantNumber) == 0 + assert isinstance(ds.distributor, list) + assert len(ds.distributor) == 0 + assert not ds.distributionDate + assert not ds.depositor + assert not ds.dateOfDeposit + assert isinstance(ds.timePeriodCovered, list) + assert len(ds.timePeriodCovered) == 0 + assert isinstance(ds.dateOfCollection, list) + assert len(ds.dateOfCollection) == 0 + assert isinstance(ds.kindOfData, list) + assert len(ds.kindOfData) == 0 + assert not ds.seriesName + assert not ds.seriesInformation + assert isinstance(ds.software, list) + assert len(ds.software) == 0 + assert isinstance(ds.relatedMaterial, list) + assert len(ds.relatedMaterial) == 0 + assert isinstance(ds.relatedDatasets, list) + assert len(ds.relatedDatasets) == 0 + assert isinstance(ds.otherReferences, list) + assert len(ds.otherReferences) == 0 + assert isinstance(ds.dataSources, list) + assert len(ds.dataSources) == 0 + assert not ds.originOfSources + assert not ds.characteristicOfSources + assert not ds.accessToSources + + """Metadata: geospatial""" + assert not ds.geospatial_displayName + assert isinstance(ds.geographicCoverage, list) + assert len(ds.geographicCoverage) == 0 + assert not ds.geographicUnit + assert isinstance(ds.geographicBoundingBox, list) + assert len(ds.geographicBoundingBox) == 0 + + """Metadata: socialscience""" + assert not ds.socialscience_displayName + assert isinstance(ds.unitOfAnalysis, list) + assert len(ds.unitOfAnalysis) == 0 + assert isinstance(ds.universe, list) + assert len(ds.universe) == 0 + assert not ds.timeMethod + assert not ds.dataCollector + assert not ds.collectorTraining + assert not ds.frequencyOfDataCollection + assert not ds.samplingProcedure + assert not ds.targetSampleActualSize + assert not ds.targetSampleSizeFormula + assert not ds.socialScienceNotesType + assert not ds.socialScienceNotesSubject + assert not ds.socialScienceNotesText + assert not ds.deviationsFromSampleDesign + assert not ds.collectionMode + assert not ds.researchInstrument + assert not ds.dataCollectionSituation + assert not ds.actionsToMinimizeLoss + assert not ds.controlOperations + assert not ds.weighting + assert not ds.cleaningOperations + assert not ds.datasetLevelErrorNotes + assert not ds.responseRate + assert not ds.samplingErrorEstimates + assert not ds.otherDataAppraisal + + """Metadata: journal""" + assert not ds.journal_displayName + assert isinstance(ds.journalVolumeIssue, list) + assert len(ds.journalVolumeIssue) == 0 + assert not ds.journalArticleType + + def test_dataset_set_dv_up(self, import_dataset_min_dict): + """Test Dataset.set() with format=`dv_up`. + + Parameters + ---------- + import_dataset_min_dict : dict + Fixture, which returns a flat dataset dict(). + + """ + ds = Dataset() + data = import_dataset_min_dict + ds.set(data) + + """dataset""" + assert ds.license == 'CC0' + assert ds.termsOfUse == 'CC0 Waiver' + assert ds.termsOfAccess == 'Terms of Access' + + """citation""" + assert ds.citation_displayName == 'Citation Metadata' + assert ds.title == 'Replication Data for: Title' + + def test_dataset_is_valid_valid(self): + """Test Dataset.is_valid() with valid data.""" + ds = Dataset() + ds.import_metadata(TEST_DIR + '/data/dataset_full.json') + + assert ds.is_valid() + + def test_dataset_is_valid_valid_not(self): + """Test Dataset.is_valid() with non-valid data.""" + ds = Dataset() + ds.import_metadata(TEST_DIR + '/data/dataset_full.json') + ds.title = None + + assert not ds.is_valid() + + def test_dataset_import_metadata_dv_up(self): + """Test Dataset.import_metadata() with format=`dv_up`.""" + ds = Dataset() + ds.import_metadata(TEST_DIR + '/data/dataset_full.json') + + """dataset""" + assert ds.license == 'CC0' + assert ds.termsOfUse == 'CC0 Waiver' + assert ds.termsOfAccess == 'Terms of Access' + + """citation""" + assert ds.citation_displayName == 'Citation Metadata' + assert ds.title == 'Replication Data for: Title' + assert ds.subtitle == 'Subtitle' + assert ds.alternativeTitle == 'Alternative Title' + assert ds.alternativeURL == 'http://AlternativeURL.org' + assert isinstance(ds.otherId, list) + assert len(ds.otherId) == 1 + for d in ds.otherId: + assert d['otherIdAgency'] in ['OtherIDAgency1'] + assert d['otherIdValue'] in ['OtherIDIdentifier1'] + assert isinstance(ds.author, list) + assert len(ds.author) == 1 + for d in ds.author: + assert d['authorName'] in ['LastAuthor1, FirstAuthor1'] + assert d['authorAffiliation'] in ['AuthorAffiliation1'] + assert d['authorIdentifierScheme'] in ['ORCID'] + assert d['authorIdentifier'] in ['AuthorIdentifier1'] + assert isinstance(ds.datasetContact, list) + assert len(ds.datasetContact) == 1 + for d in ds.datasetContact: + assert d['datasetContactName'] in ['LastContact1, FirstContact1'] + assert d['datasetContactAffiliation'] in ['ContactAffiliation1'] + assert d['datasetContactEmail'] in ['ContactEmail1@mailinator.com'] + assert isinstance(ds.dsDescription, list) + assert len(ds.dsDescription) == 1 + for d in ds.dsDescription: + assert d['dsDescriptionValue'] in ['DescriptionText2'] + assert d['dsDescriptionDate'] in ['1000-02-02'] + assert ds.subject == ['Agricultural Sciences', + 'Business and Management', 'Engineering', 'Law'] + assert isinstance(ds.keyword, list) + assert len(ds.keyword) == 1 + for d in ds.keyword: + assert d['keywordValue'] in ['KeywordTerm1'] + assert d['keywordVocabulary'] in ['KeywordVocabulary1'] + assert d['keywordVocabularyURI'] in ['http://KeywordVocabularyURL1.org'] + assert isinstance(ds.topicClassification, list) + assert len(ds.topicClassification) == 1 + for d in ds.topicClassification: + assert d['topicClassValue'] in ['Topic Class Value1'] + assert d['topicClassVocab'] in ['Topic Classification Vocabulary'] + assert isinstance(ds.publication, list) + assert len(ds.publication) == 1 + for d in ds.publication: + assert d['publicationCitation'] in ['RelatedPublicationCitation1'] + assert d['publicationIDType'] in ['ark'] + assert d['publicationIDNumber'] in ['RelatedPublicationIDNumber1'] + assert d['publicationURL'] in ['http://RelatedPublicationURL1.org'] + assert ds.notesText == 'Notes1' + assert isinstance(ds.producer, list) + assert len(ds.producer) == 1 + for d in ds.producer: + assert d['producerName'] in ['LastProducer1, FirstProducer1'] + assert d['producerAffiliation'] in ['ProducerAffiliation1'] + assert d['producerAbbreviation'] in ['ProducerAbbreviation1'] + assert d['producerURL'] in ['http://ProducerURL1.org'] + assert d['producerLogoURL'] in ['http://ProducerLogoURL1.org'] + assert ds.productionDate == '1003-01-01' + assert ds.productionPlace == 'ProductionPlace' + assert isinstance(ds.contributor, list) + assert len(ds.contributor) == 1 + for d in ds.contributor: + assert d['contributorType'] in ['Data Collector'] + assert d['contributorName'] in ['LastContributor1, FirstContributor1'] + assert isinstance(ds.grantNumber, list) + assert len(ds.grantNumber) == 1 + for d in ds.grantNumber: + assert d['grantNumberAgency'] in ['GrantInformationGrantAgency1'] + assert d['grantNumberValue'] in ['GrantInformationGrantNumber1'] + assert isinstance(ds.distributor, list) + assert len(ds.distributor) == 1 + for d in ds.distributor: + assert d['distributorName'] in ['LastDistributor1, FirstDistributor1'] + assert d['distributorAffiliation'] in ['DistributorAffiliation1'] + assert d['distributorAbbreviation'] in ['DistributorAbbreviation1'] + assert d['distributorURL'] in ['http://DistributorURL1.org'] + assert d['distributorLogoURL'] in ['http://DistributorLogoURL1.org'] + assert ds.distributionDate == '1004-01-01' + assert ds.depositor == 'LastDepositor, FirstDepositor' + assert ds.dateOfDeposit == '1002-01-01' + assert isinstance(ds.timePeriodCovered, list) + assert len(ds.timePeriodCovered) == 1 + for d in ds.timePeriodCovered: + assert d['timePeriodCoveredStart'] in ['1005-01-01'] + assert d['timePeriodCoveredEnd'] in ['1005-01-02'] + assert isinstance(ds.dateOfCollection, list) + assert len(ds.dateOfCollection) == 1 + for d in ds.dateOfCollection: + assert d['dateOfCollectionStart'] in ['1006-01-01'] + assert d['dateOfCollectionEnd'] in ['1006-01-01'] + assert ds.kindOfData == ['KindOfData1', 'KindOfData2'] + assert ds.seriesName == 'SeriesName' + assert ds.seriesInformation == 'SeriesInformation' + assert isinstance(ds.software, list) + assert len(ds.software) == 1 + for d in ds.software: + assert d['softwareName'] in ['SoftwareName1'] + assert d['softwareVersion'] in ['SoftwareVersion1'] + assert ds.relatedMaterial == ['RelatedMaterial1', 'RelatedMaterial2'] + assert ds.relatedDatasets == ['RelatedDatasets1', 'RelatedDatasets2'] + assert ds.otherReferences == ['OtherReferences1', 'OtherReferences2'] + assert ds.dataSources == ['DataSources1', 'DataSources2'] + assert ds.originOfSources == 'OriginOfSources' + assert ds.characteristicOfSources == 'CharacteristicOfSourcesNoted' + assert ds.accessToSources == 'DocumentationAndAccessToSources' + + """geospatial""" + assert ds.geospatial_displayName == 'Geospatial Metadata' + assert isinstance(ds.geographicCoverage, list) + assert len(ds.geographicCoverage) == 1 + for d in ds.geographicCoverage: + assert d['country'] in ['Afghanistan'] + assert d['state'] in ['GeographicCoverageStateProvince1'] + assert d['city'] in ['GeographicCoverageCity1'] + assert d['otherGeographicCoverage'] in ['GeographicCoverageOther1'] + assert ds.geographicUnit == ['GeographicUnit1', 'GeographicUnit2'] + assert isinstance(ds.geographicBoundingBox, list) + assert len(ds.geographicBoundingBox) == 1 + for d in ds.geographicBoundingBox: + assert d['westLongitude'] in ['10'] + assert d['eastLongitude'] in ['20'] + assert d['northLongitude'] in ['30'] + assert d['southLongitude'] in ['40'] + + """socialscience""" + assert ds.socialscience_displayName == 'Social Science and Humanities Metadata' + assert ds.unitOfAnalysis == ['UnitOfAnalysis1', 'UnitOfAnalysis2'] + assert ds.universe == ['Universe1', 'Universe2'] + assert ds.timeMethod == 'TimeMethod' + assert ds.dataCollector == 'LastDataCollector1, FirstDataCollector1' + assert ds.collectorTraining == 'CollectorTraining' + assert ds.frequencyOfDataCollection == 'Frequency' + assert ds.samplingProcedure == 'SamplingProcedure' + assert ds.targetSampleActualSize == '100' + assert ds.targetSampleSizeFormula == 'TargetSampleSizeFormula' + assert ds.deviationsFromSampleDesign == 'MajorDeviationsForSampleDesign' + assert ds.collectionMode == 'CollectionMode' + assert ds.researchInstrument == 'TypeOfResearchInstrument' + assert ds.dataCollectionSituation == 'CharacteristicsOfDataCollectionSituation' + assert ds.actionsToMinimizeLoss == 'ActionsToMinimizeLosses' + assert ds.controlOperations == 'ControlOperations' + assert ds.weighting == 'Weighting' + assert ds.cleaningOperations == 'CleaningOperations' + assert ds.datasetLevelErrorNotes == 'StudyLevelErrorNotes' + assert ds.responseRate == 'ResponseRate' + assert ds.samplingErrorEstimates == 'EstimatesOfSamplingError' + assert ds.otherDataAppraisal == 'OtherFormsOfDataAppraisal' + assert ds.socialScienceNotesType == 'NotesType' + assert ds.socialScienceNotesSubject == 'NotesSubject' + assert ds.socialScienceNotesText == 'NotesText' + + """journal""" + assert ds.journal_displayName == 'Journal Metadata' + assert isinstance(ds.journalVolumeIssue, list) + assert len(ds.journalVolumeIssue) == 1 + for d in ds.journalVolumeIssue: + assert d['journalVolume'] in ['JournalVolume1'] + assert d['journalIssue'] in ['JournalIssue1'] + assert d['journalPubDate'] in ['1008-01-01'] + assert ds.journalArticleType == 'abstract' + + def test_dataset_import_metadata_format_wrong(self): + """Test Dataset.import_metadata() with non-valid format.""" + ds = Dataset() + ds.import_metadata(TEST_DIR + '/data/dataset_full.json', 'wrong') + + assert isinstance(ds.datafiles, list) + assert len(ds.datafiles) == 0 + + """Metadata: dataset""" + assert not ds.license + assert not ds.termsOfUse + assert not ds.termsOfAccess + + """Metadata: citation""" + assert not ds.citation_displayName + assert not ds.title + assert not ds.subtitle + assert not ds.alternativeTitle + assert not ds.alternativeURL + assert isinstance(ds.otherId, list) + assert len(ds.otherId) == 0 + assert isinstance(ds.author, list) + assert len(ds.author) == 0 + assert isinstance(ds.datasetContact, list) + assert len(ds.datasetContact) == 0 + assert isinstance(ds.dsDescription, list) + assert len(ds.dsDescription) == 0 + assert isinstance(ds.subject, list) + assert len(ds.subject) == 0 + assert isinstance(ds.subject, list) + assert len(ds.subject) == 0 + assert isinstance(ds.topicClassification, list) + assert len(ds.topicClassification) == 0 + assert isinstance(ds.publication, list) + assert len(ds.publication) == 0 + assert not ds.notesText + assert isinstance(ds.producer, list) + assert len(ds.producer) == 0 + assert not ds.productionDate + assert not ds.productionPlace + assert isinstance(ds.contributor, list) + assert len(ds.contributor) == 0 + assert isinstance(ds.grantNumber, list) + assert len(ds.grantNumber) == 0 + assert isinstance(ds.distributor, list) + assert len(ds.distributor) == 0 + assert not ds.distributionDate + assert not ds.depositor + assert not ds.dateOfDeposit + assert isinstance(ds.timePeriodCovered, list) + assert len(ds.timePeriodCovered) == 0 + assert isinstance(ds.dateOfCollection, list) + assert len(ds.dateOfCollection) == 0 + assert isinstance(ds.kindOfData, list) + assert len(ds.kindOfData) == 0 + assert not ds.seriesName + assert not ds.seriesInformation + assert isinstance(ds.software, list) + assert len(ds.software) == 0 + assert isinstance(ds.relatedMaterial, list) + assert len(ds.relatedMaterial) == 0 + assert isinstance(ds.relatedDatasets, list) + assert len(ds.relatedDatasets) == 0 + assert isinstance(ds.otherReferences, list) + assert len(ds.otherReferences) == 0 + assert isinstance(ds.dataSources, list) + assert len(ds.dataSources) == 0 + assert not ds.originOfSources + assert not ds.characteristicOfSources + assert not ds.accessToSources + + """Metadata: geospatial""" + assert not ds.geospatial_displayName + assert isinstance(ds.geographicCoverage, list) + assert len(ds.geographicCoverage) == 0 + assert not ds.geographicUnit + assert isinstance(ds.geographicBoundingBox, list) + assert len(ds.geographicBoundingBox) == 0 + + """Metadata: socialscience""" + assert not ds.socialscience_displayName + assert isinstance(ds.unitOfAnalysis, list) + assert len(ds.unitOfAnalysis) == 0 + assert isinstance(ds.universe, list) + assert len(ds.universe) == 0 + assert not ds.timeMethod + assert not ds.dataCollector + assert not ds.collectorTraining + assert not ds.frequencyOfDataCollection + assert not ds.samplingProcedure + assert not ds.targetSampleActualSize + assert not ds.targetSampleSizeFormula + assert not ds.socialScienceNotesType + assert not ds.socialScienceNotesSubject + assert not ds.socialScienceNotesText + assert not ds.deviationsFromSampleDesign + assert not ds.collectionMode + assert not ds.researchInstrument + assert not ds.dataCollectionSituation + assert not ds.actionsToMinimizeLoss + assert not ds.controlOperations + assert not ds.weighting + assert not ds.cleaningOperations + assert not ds.datasetLevelErrorNotes + assert not ds.responseRate + assert not ds.samplingErrorEstimates + assert not ds.otherDataAppraisal + + """Metadata: journal""" + assert not ds.journal_displayName + assert isinstance(ds.journalVolumeIssue, list) + assert len(ds.journalVolumeIssue) == 0 + assert not ds.journalArticleType diff --git a/tests/test_models_dataverse.py b/tests/test_models_dataverse.py new file mode 100644 index 0000000..ae3e90f --- /dev/null +++ b/tests/test_models_dataverse.py @@ -0,0 +1,274 @@ +# !/usr/bin/env python +# -*- coding: utf-8 -*- +"""Dataverse data model tests.""" +import os +from pyDataverse.models import Dataset +from pyDataverse.models import Dataverse + +TEST_DIR = os.path.dirname(os.path.realpath(__file__)) + + +class TestDataverse(object): + """Tests for Dataverse().""" + + def test_dataverse_init(self): + """Test Dataverse.__init__().""" + dv = Dataverse() + + assert isinstance(dv.datasets, list) + assert len(dv.datasets) == 0 + assert isinstance(dv.dataverses, list) + assert len(dv.dataverses) == 0 + assert not dv.pid + assert not dv.name + assert not dv.alias + assert isinstance(dv.dataverseContacts, list) + assert len(dv.dataverseContacts) == 0 + assert not dv.affiliation + assert not dv.description + assert not dv.dataverseType + + def test_dataverse_set_dv_up(self, import_dataverse_min_dict): + """Test Dataverse.set() with format=`dv_up`. + + Parameters + ---------- + import_dataverse_min_dict : dict + Fixture, which returns a flat dataset dict() coming from + `tests/data/dataverse_min.json`. + + """ + data = import_dataverse_min_dict + dv = Dataverse() + dv.set(data) + + assert isinstance(dv.datasets, list) + assert not dv.datasets + assert isinstance(dv.dataverses, list) + assert not dv.dataverses + assert not dv.pid + assert dv.alias == 'test-pyDataverse' + assert dv.name == 'Test pyDataverse' + assert len(dv.dataverseContacts) == 1 + assert dv.dataverseContacts[0]['contactEmail'] == 'info@aussda.at' + + def test_dataverse_import_metadata_dv_up(self): + """Test Dataverse.import_metadata() with format=`dv_up`.""" + dv = Dataverse() + dv.import_metadata(TEST_DIR + '/data/dataverse_min.json') + + assert isinstance(dv.datasets, list) + assert not dv.datasets + assert isinstance(dv.dataverses, list) + assert not dv.dataverses + assert not dv.pid + assert dv.alias == 'test-pyDataverse' + assert dv.name == 'Test pyDataverse' + assert isinstance(dv.dataverseContacts, list) + assert len(dv.dataverseContacts) == 1 + assert dv.dataverseContacts[0]['contactEmail'] == 'info@aussda.at' + + def test_dataverse_import_metadata_format_wrong(self): + """Test Dataverse.import_metadata() with non-valid format.""" + dv = Dataverse() + dv.import_metadata(TEST_DIR + '/data/dataverse_min.json', 'wrong') + + assert isinstance(dv.datasets, list) + assert len(dv.datasets) == 0 + assert not dv.datasets + assert isinstance(dv.dataverses, list) + assert len(dv.dataverses) == 0 + assert not dv.dataverses + assert not dv.pid + assert not dv.name + assert not dv.alias + assert isinstance(dv.dataverseContacts, list) + assert len(dv.dataverseContacts) == 0 + assert not dv.dataverseContacts + assert not dv.affiliation + assert not dv.description + assert not dv.dataverseType + + def test_dataverse_is_valid_valid(self, import_dataverse_min_dict): + """Test Dataverse.is_valid() with valid data. + + Parameters + ---------- + import_dataverse_min_dict : dict + Fixture, which returns a flat dataset dict() coming from + `tests/data/dataverse_min.json`. + + """ + data = import_dataverse_min_dict + dv = Dataverse() + dv.set(data) + + assert dv.is_valid() + + def test_dataverse_is_valid_not(self, import_dataverse_min_dict): + """Test Dataverse.is_valid() with non-valid data. + + Parameters + ---------- + import_dataverse_min_dict : dict + Fixture, which returns a flat dataset dict() coming from + `tests/data/dataverse_min.json`. + + """ + data = import_dataverse_min_dict + dv = Dataverse() + dv.set(data) + dv.name = None + + assert not dv.name + assert not dv.is_valid() + + def test_dataverse_dict_dv_up_valid(self, import_dataverse_min_dict): + """Test Dataverse.dict() with format=`dv_up` and valid data. + + Parameters + ---------- + import_dataverse_min_dict : dict + Fixture, which returns a flat dataset dict() coming from + `tests/data/dataverse_min.json`. + + """ + data = import_dataverse_min_dict + dv = Dataverse() + dv.set(data) + + assert dv.dict() + assert isinstance(dv.dict(), dict) + + def test_dataverse_dict_all_valid(self, import_dataverse_min_dict): + """Test Dataverse.dict() with format=`all` and valid data. + + Parameters + ---------- + import_dataverse_min_dict : dict + Fixture, which returns a flat dataset dict() coming from + `tests/data/dataverse_min.json`. + + """ + data = import_dataverse_min_dict + dv = Dataverse() + dv.set(data) + dv.datasets = [Dataset()] + dv.dataverses = [Dataverse()] + dv.pid = 'doi:10.11587/EVMUHP' + data = dv.dict('all') + + assert data + assert isinstance(data, dict) + assert data['alias'] == 'test-pyDataverse' + assert data['name'] == 'Test pyDataverse' + assert data['dataverseContacts'][0]['contactEmail'] == 'info@aussda.at' + assert data['pid'] == 'doi:10.11587/EVMUHP' + + def test_dataverse_dict_format_wrong(self, import_dataverse_min_dict): + """Test Dataverse.dict() with non-valid format. + + Parameters + ---------- + import_dataverse_min_dict : dict + Fixture, which returns a flat dataset dict() coming from + `tests/data/dataverse_min.json`. + + """ + data = import_dataverse_min_dict + dv = Dataverse() + dv.set(data) + + assert not dv.dict('wrong') + + def test_dataverse_dict_dv_up_valid_not(self, import_dataverse_min_dict): + """Test Dataverse.dict() with format=`dv_up` and non-valid data. + + Parameters + ---------- + import_dataverse_min_dict : dict + Fixture, which returns a flat dataset dict() coming from + `tests/data/dataverse_min.json`. + + """ + data = import_dataverse_min_dict + dv = Dataverse() + dv.set(data) + dv.name = None + + assert not dv.dict() + + def test_dataverse_json_dv_up_valid(self, import_dataverse_min_dict): + """Test Dataverse.json() with format=`dv_up` and valid data. + + Parameters + ---------- + import_dataverse_min_dict : dict + Fixture, which returns a flat dataset dict() coming from + `tests/data/dataverse_min.json`. + + """ + data = import_dataverse_min_dict + dv = Dataverse() + dv.set(data) + + assert dv.json() + assert isinstance(dv.json(), str) + + def test_dataverse_json_dv_up_valid_not(self, import_dataverse_min_dict): + """Test Dataverse.json() with format=`dv_up` and non-valid data. + + Parameters + ---------- + import_dataverse_min_dict : dict + Fixture, which returns a flat dataset dict() coming from + `tests/data/dataverse_min.json`. + + """ + data = import_dataverse_min_dict + dv = Dataverse() + dv.set(data) + dv.name = None + + assert not dv.json() + + def test_dataverse_json_all_valid(self, import_dataverse_min_dict): + """Test Dataverse.json() with format=`all` and valid data. + + Parameters + ---------- + import_dataverse_min_dict : dict + Fixture, which returns a flat dataset dict() coming from + `tests/data/dataverse_min.json`. + + """ + data = import_dataverse_min_dict + dv = Dataverse() + dv.set(data) + dv.datasets = [Dataset()] + dv.dataverses = [Dataverse()] + dv.pid = 'doi:10.11587/EVMUHP' + data = dv.json('all') + + assert data + assert isinstance(data, str) + + def test_dataverse_json_format_wrong_valid(self, import_dataverse_min_dict): + """Test Dataverse.json() with non-valid format and valid data. + + Parameters + ---------- + import_dataverse_min_dict : dict + Fixture, which returns a flat dataset dict() coming from + `tests/data/dataverse_min.json`. + + """ + data = import_dataverse_min_dict + dv = Dataverse() + dv.set(data) + dv.datasets = [Dataset()] + dv.dataverses = [Dataverse()] + dv.pid = 'doi:10.11587/EVMUHP' + data = dv.json('wrong') + + assert not data diff --git a/tox.ini b/tox.ini index 6992f39..74e1b7f 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py36,coverage,coveralls,docs,flake8,packaging,dist +envlist = py35,coverage,coveralls,docs,packaging,dist skip_missing_interpreters = True ignore_basepython_conflict = True @@ -8,53 +8,55 @@ description = default settings for unspecified tests usedevelop = False skip_install = False passenv = * -basepython = python3.6 +basepython = python3.5 [testenv:py27] deps = -r{toxinidir}/tools/tests-requirements.txt commands = - pytest tests/ --cov=src/pyDataverse --basetemp={envtmpdir} + pytest tests/ --cov=pyDataverse --basetemp={envtmpdir} [testenv:py34] deps = -r{toxinidir}/tools/tests-requirements.txt commands = - pytest tests/ --cov=src/pyDataverse --basetemp={envtmpdir} + pytest tests/ --cov=pyDataverse --basetemp={envtmpdir} [testenv:py35] deps = -r{toxinidir}/tools/tests-requirements.txt commands = - pytest tests/ --cov=src/pyDataverse --basetemp={envtmpdir} + pytest tests/ --cov=pyDataverse --basetemp={envtmpdir} [testenv:py36] deps = -r{toxinidir}/tools/tests-requirements.txt commands = - pytest tests/ --cov=src/pyDataverse --basetemp={envtmpdir} + pytest tests/ --cov=pyDataverse --basetemp={envtmpdir} [testenv:py37] deps = -r{toxinidir}/tools/tests-requirements.txt commands = - pytest tests/ --cov=src/pyDataverse --basetemp={envtmpdir} + pytest tests/ --cov=pyDataverse --basetemp={envtmpdir} [testenv:coverage] description = create report for coverage deps = -r{toxinidir}/tools/tests-requirements.txt commands = - pytest tests/ --cov=src/pyDataverse --cov-report=term-missing --cov-report=xml --cov-report=html + pytest tests/ --cov=pyDataverse --cov-report=term-missing --cov-report=xml --cov-report=html [testenv:coveralls] description = create reports for coveralls deps = -r{toxinidir}/tools/tests-requirements.txt commands = - pytest tests/ --doctest-modules -v --cov=src/pyDataverse + pytest tests/ --doctest-modules -v --cov=pyDataverse [flake8] +max-line-length = 80 +ignore = E129 exclude = src/pyDataverse/docs/source/conf.py .tox @@ -100,3 +102,13 @@ recreate = True commands = pip install --index-url https://test.pypi.org/simple/ pyDataverse pip freeze + python -c "import pyDataverse; print(pyDataverse.__name__); print(pyDataverse.__version__)" + +[testenv:pypi] +description = install from pypi +skip_install = True +recreate = True +commands = + pip install pyDataverse + pip freeze + python -c "import pyDataverse; print(pyDataverse.__name__); print(pyDataverse.__version__)"