From 7aa95a4ea6ad065dac6342b6e196e209ec670b48 Mon Sep 17 00:00:00 2001 From: Stefan Kasberger Date: Tue, 28 May 2019 20:28:14 +0200 Subject: [PATCH 01/46] add release 0.1.1 notes to history.md --- HISTORY.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/HISTORY.md b/HISTORY.md index 01e1e07..429c989 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,6 +1,11 @@ .. :changelog: +0.1.1 - (2019-05-28) +------------------------------------ + +[Release](https://github.com/AUSSDA/pyDataverse/releases/tag/v0.1.1) + 0.1.0 - Marietta Blau (2019-05-20) ------------------------------------ -- First release on PyPI. +[Release](https://github.com/AUSSDA/pyDataverse/releases/tag/v0.1.0) From b2fd32fc14dbb3f68cafbd1b6e06196972514699 Mon Sep 17 00:00:00 2001 From: Stefan Kasberger Date: Tue, 28 May 2019 20:44:40 +0200 Subject: [PATCH 02/46] change function names of get, post and delete requests --- src/pyDataverse/api.py | 50 +++++++++++++++++++++--------------------- tests/test_api.py | 6 ++--- 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/src/pyDataverse/api.py b/src/pyDataverse/api.py index 5988dd7..17cb687 100644 --- a/src/pyDataverse/api.py +++ b/src/pyDataverse/api.py @@ -117,7 +117,7 @@ def __str__(self): """ return 'pyDataverse API class' - def make_get_request(self, query_str, params=None, auth=False): + def get_request(self, query_str, params=None, auth=False): """Make a GET request. Parameters @@ -146,7 +146,7 @@ def make_get_request(self, query_str, params=None, auth=False): else: ApiAuthorizationError( 'ERROR: GET - Api token not passed to ' - '`make_get_request` {}.'.format(url) + '`get_request` {}.'.format(url) ) try: @@ -174,8 +174,8 @@ def make_get_request(self, query_str, params=None, auth=False): ''.format(url) ) - def make_post_request(self, query_str, metadata=None, auth=False, - params=None): + def post_request(self, query_str, metadata=None, auth=False, + params=None): """Make a POST request. Parameters @@ -206,7 +206,7 @@ def make_post_request(self, query_str, metadata=None, auth=False, else: ApiAuthorizationError( 'ERROR: POST - Api token not passed to ' - '`make_post_request` {}.'.format(url) + '`post_request` {}.'.format(url) ) try: @@ -228,7 +228,7 @@ def make_post_request(self, query_str, metadata=None, auth=False, ''.format(url) ) - def make_delete_request(self, query_str, auth=False, params=None): + def delete_request(self, query_str, auth=False, params=None): """Make a DELETE request. Parameters @@ -257,7 +257,7 @@ def make_delete_request(self, query_str, auth=False, params=None): else: ApiAuthorizationError( 'ERROR: DELETE - Api token not passed to ' - '`make_delete_request` {}.'.format(url) + '`delete_request` {}.'.format(url) ) try: @@ -294,7 +294,7 @@ def get_dataverse(self, identifier, auth=False): """ query_str = '/dataverses/{0}'.format(identifier) - resp = self.make_get_request(query_str, auth=auth) + resp = self.get_request(query_str, auth=auth) return resp def create_dataverse(self, identifier, metadata, auth=True, @@ -343,7 +343,7 @@ def create_dataverse(self, identifier, metadata, auth=True, ) query_str = '/dataverses/{0}'.format(parent) - resp = self.make_post_request(query_str, metadata, auth) + resp = self.post_request(query_str, metadata, auth) if resp.status_code == 404: error_msg = resp.json()['message'] @@ -386,7 +386,7 @@ def publish_dataverse(self, identifier, auth=True): """ query_str = '/dataverses/{0}/actions/:publish'.format(identifier) - resp = self.make_post_request(query_str, auth=auth) + resp = self.post_request(query_str, auth=auth) if resp.status_code == 401: error_msg = resp.json()['message'] @@ -433,7 +433,7 @@ def delete_dataverse(self, identifier, auth=True): """ query_str = '/dataverses/{0}'.format(identifier) - resp = self.make_delete_request(query_str, auth) + resp = self.delete_request(query_str, auth) if resp.status_code == 401: error_msg = resp.json()['message'] @@ -492,7 +492,7 @@ def get_dataset(self, identifier, auth=True, is_doi=True): identifier) else: query_str = '/datasets/{0}'.format(identifier) - resp = self.make_get_request(query_str, auth=auth) + resp = self.get_request(query_str, auth=auth) return resp def get_dataset_export(self, identifier, export_format): @@ -520,7 +520,7 @@ def get_dataset_export(self, identifier, export_format): """ query_str = '/datasets/export?exporter={0}&persistentId={1}'.format( export_format, identifier) - resp = self.make_get_request(query_str) + resp = self.get_request(query_str) return resp def create_dataset(self, dataverse, metadata, auth=True): @@ -562,7 +562,7 @@ def create_dataset(self, dataverse, metadata, auth=True): """ query_str = '/dataverses/{0}/datasets'.format(dataverse) - resp = self.make_post_request(query_str, metadata, auth) + resp = self.post_request(query_str, metadata, auth) if resp.status_code == 404: error_msg = resp.json()['message'] @@ -627,7 +627,7 @@ def publish_dataset(self, identifier, type='minor', auth=True): """ query_str = '/datasets/:persistentId/actions/:publish' query_str += '?persistentId={0}&type={1}'.format(identifier, type) - resp = self.make_post_request(query_str, auth=auth) + resp = self.post_request(query_str, auth=auth) if resp.status_code == 404: error_msg = resp.json()['message'] @@ -665,7 +665,7 @@ def delete_dataset(self, identifier, auth=True): """ query_str = '/datasets/:persistentId/?persistentId={0}'.format( identifier) - resp = self.make_delete_request(query_str, auth=auth) + resp = self.delete_request(query_str, auth=auth) if resp.status_code == 404: error_msg = resp.json()['message'] @@ -712,7 +712,7 @@ def get_datafiles(self, doi, version='1'): """ base_str = '/datasets/:persistentId/versions/' query_str = base_str+'{0}/files?persistentId={1}'.format(version, doi) - resp = self.make_get_request(query_str) + resp = self.get_request(query_str) return resp def get_datafile(self, identifier): @@ -736,7 +736,7 @@ def get_datafile(self, identifier): """ query_str = '/access/datafile/{0}'.format(identifier) - resp = self.make_get_request(query_str) + resp = self.get_request(query_str) return resp def get_datafile_bundle(self, identifier): @@ -772,7 +772,7 @@ def get_datafile_bundle(self, identifier): """ query_str = '/access/datafile/bundle/{0}'.format(identifier) - data = self.make_get_request(query_str) + data = self.get_request(query_str) return data def upload_file(self, identifier, filename): @@ -806,7 +806,7 @@ def upload_file(self, identifier, filename): self.api_token) shell_command += ' -X POST {0} -F file=@{1}'.format( query_str, filename) - # TODO: is shell=True necessary? + # TODO(Shell): is shell=True necessary? result = sp.run(shell_command, shell=True, stdout=sp.PIPE) resp = json.loads(result.stdout) return resp @@ -826,7 +826,7 @@ def get_info_version(self): """ query_str = '/info/version' - resp = self.make_get_request(query_str) + resp = self.get_request(query_str) return resp def get_info_server(self): @@ -844,7 +844,7 @@ def get_info_server(self): """ query_str = '/info/server' - resp = self.make_get_request(query_str) + resp = self.get_request(query_str) return resp def get_info_apiTermsOfUse(self): @@ -862,7 +862,7 @@ def get_info_apiTermsOfUse(self): """ query_str = '/info/apiTermsOfUse' - resp = self.make_get_request(query_str) + resp = self.get_request(query_str) return resp def get_metadatablocks(self): @@ -879,7 +879,7 @@ def get_metadatablocks(self): """ query_str = '/metadatablocks' - resp = self.make_get_request(query_str) + resp = self.get_request(query_str) return resp def get_metadatablock(self, identifier): @@ -902,5 +902,5 @@ def get_metadatablock(self, identifier): """ query_str = '/metadatablocks/{0}'.format(identifier) - resp = self.make_get_request(query_str) + resp = self.get_request(query_str) return resp diff --git a/tests/test_api.py b/tests/test_api.py index 616356d..85da49c 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -87,11 +87,11 @@ def setup_class(cls): assert cls.api.api_token assert cls.api.base_url - def test_make_get_request(self): - """Test successfull `.make_get_request()` request.""" + def test_get_request(self): + """Test successfull `.get_request()` request.""" # TODO: test params und auth default query_str = '/info/server' - resp = self.api.make_get_request(query_str) + resp = self.api.get_request(query_str) sleep(SLEEP_TIME) assert self.api.status == 'OK' assert isinstance(resp, Response) From c046db063cf36843e9227fae2420193be4690229 Mon Sep 17 00:00:00 2001 From: Stefan Kasberger Date: Tue, 28 May 2019 20:45:19 +0200 Subject: [PATCH 03/46] minor docs update --- README.md | 2 +- src/pyDataverse/docs/source/conf.py | 1 - src/pyDataverse/docs/source/developer.rst | 2 ++ 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 69d9159..32c414a 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -[![Build Status](https://travis-ci.com/AUSSDA/pyDataverse.svg?branch=master)](https://travis-ci.com/AUSSDA/pyDataverse) [![Coverage Status](https://coveralls.io/repos/github/AUSSDA/pyDataverse/badge.svg)](https://coveralls.io/github/AUSSDA/pyDataverse) [![Documentation Status](https://readthedocs.org/projects/pydataverse/badge/?version=latest)](https://pydataverse.readthedocs.io/en/latest) [![GitHub](https://img.shields.io/github/license/aussda/pydataverse.svg)](https://opensource.org/licenses/MIT) +[![PyPI](https://img.shields.io/pypi/v/pyDataverse.svg)](https://pypi.org/project/pyDataverse/) [![Build Status](https://travis-ci.com/AUSSDA/pyDataverse.svg?branch=master)](https://travis-ci.com/AUSSDA/pyDataverse) [![Coverage Status](https://coveralls.io/repos/github/AUSSDA/pyDataverse/badge.svg)](https://coveralls.io/github/AUSSDA/pyDataverse) [![Documentation Status](https://readthedocs.org/projects/pydataverse/badge/?version=latest)](https://pydataverse.readthedocs.io/en/latest) ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/pydataverse.svg) [![GitHub](https://img.shields.io/github/license/aussda/pydataverse.svg)](https://opensource.org/licenses/MIT) # pyDataverse diff --git a/src/pyDataverse/docs/source/conf.py b/src/pyDataverse/docs/source/conf.py index ae1e187..3ba9937 100644 --- a/src/pyDataverse/docs/source/conf.py +++ b/src/pyDataverse/docs/source/conf.py @@ -83,7 +83,6 @@ # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' - # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for diff --git a/src/pyDataverse/docs/source/developer.rst b/src/pyDataverse/docs/source/developer.rst index b3fe509..05758ab 100644 --- a/src/pyDataverse/docs/source/developer.rst +++ b/src/pyDataverse/docs/source/developer.rst @@ -35,6 +35,8 @@ Install from the local git repository, with all it's dependencies: .. code-block:: shell + git clone git@github.com:AUSSDA/pyDataverse.git + cd pyDataverse virtualenv venv source venv/bin/activate pip install -r tools/tests-requirements.txt From 598924edd8285a795400e0752abb6fcc0b864c73 Mon Sep 17 00:00:00 2001 From: Stefan Kasberger Date: Tue, 28 May 2019 20:46:10 +0200 Subject: [PATCH 04/46] update requirements to requests>=2.12.0 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 733d2f0..324ee16 100644 --- a/setup.py +++ b/setup.py @@ -53,7 +53,7 @@ def run_tests(self): INSTALL_REQUIREMENTS = [ # A string or list of strings specifying what other distributions need to # be installed when this one is. - 'requests' + 'requests>=2.12.0' ] SETUP_REQUIREMENTS = [ From 89b3ef47c23d7df980fff344d2d320861eb0cc25 Mon Sep 17 00:00:00 2001 From: Stefan Kasberger Date: Tue, 28 May 2019 20:46:46 +0200 Subject: [PATCH 05/46] add pypi tests to tox.ini --- tox.ini | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tox.ini b/tox.ini index 6992f39..6c86f05 100644 --- a/tox.ini +++ b/tox.ini @@ -100,3 +100,13 @@ recreate = True commands = pip install --index-url https://test.pypi.org/simple/ pyDataverse pip freeze + python -c "import pyDataverse; print(pyDataverse.__name__); print(pyDataverse.__version__)" + +[testenv:pypi] +description = install from pypi +skip_install = True +recreate = True +commands = + pip install pyDataverse + pip freeze + python -c "import pyDataverse; print(pyDataverse.__name__); print(pyDataverse.__version__)" From e9243916ad0f79e204f3c4897ec78b84c33ca6b8 Mon Sep 17 00:00:00 2001 From: Stefan Kasberger Date: Tue, 28 May 2019 21:16:47 +0200 Subject: [PATCH 06/46] add data/ to gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 23174ea..3fcf35b 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,7 @@ notes*.md stash*.* setup.sh .pypirc +data/ # Distribution / packaging build/ From adf5c8bfb773777569ccc0334411e7b0cf11a117 Mon Sep 17 00:00:00 2001 From: Stefan Kasberger Date: Tue, 28 May 2019 23:53:26 +0200 Subject: [PATCH 07/46] init models.py: add __init__, import_data, export_data, set, json and dict methods --- src/pyDataverse/models.py | 189 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 189 insertions(+) create mode 100644 src/pyDataverse/models.py diff --git a/src/pyDataverse/models.py b/src/pyDataverse/models.py new file mode 100644 index 0000000..458283e --- /dev/null +++ b/src/pyDataverse/models.py @@ -0,0 +1,189 @@ +# !/usr/bin/env python +# -*- coding: utf-8 -*- +"""Find out more at https://github.com/AUSSDA/pyDataverse.""" +from __future__ import absolute_import +from pyDataverse.utils import dict_to_json +from pyDataverse.utils import json_to_dict +from pyDataverse.utils import read_file_json +from pyDataverse.utils import write_file_json + + +""" +Data-structure to work with data and metadata of Dataverses, Datasets and +Datafiles - coming from different sources. +""" + + +class Dataverse(object): + """Base class for the Dataverse model. + + * data + * dict: dict mit key value pairs übergeben, wo key exakt das attribut ist. + * optional: list: liste tuples (links key, rechts value) übergeben, wo key exakt das attribut ist. + * does: set metadata functions: dicts mit key-value pairs übergeben. die keys müssen wie die metadata attribute + + """ + + def __init__(self): + """Init a `Dataverse()` class.""" + self.name = None + self.alias = None + self.contactEmail = [] + self.affiliation = None + self.description = None + self.dataverseType = None + self.datasets = [] + + def __str__(self): + """Return name of Dataverse() class for users.""" + return 'pyDataverse Dataverse() model class.' + + def set(self, data): + """Set attributes.""" + if isinstance(data, list): + # TODO: prüfen, ob die struktur passt + data = dict(data) + elif not isinstance(data, dict): + # TODO: Exception raisen + print('Data was not passed in the correct data type. Dict() or ' + 'List() required.') + + for key, val in data.items(): + # TODO: prüfen, ob es sich immer um strings handelt bei den keys und values. + if key == 'alias': + self.alias = val + elif key == 'name': + self.name = val + elif key == 'contactEmail': + # TODO: add oder overwrite?? + if isinstance(val, list): + for email in val: + self.contactEmail.append(email) + elif isinstance(val, str): + self.contactEmail.append(val) + else: + # TODO: Exception + print('contactEmail "{}" not a list or a string. Do not' + ' know what to do'.format(val)) + elif key == 'affiliation': + self.affiliation = val + elif key == 'description': + self.description = val + elif key == 'dataverseType': + self.dataverseType = val + else: + print('Key "{}" passed is not valid'.format(key)) + + @property + def dict(self): + """Get Dataverse metadata as dict for Dataverse API upload. + + TODO: Validate standard + + """ + data = {} + + # prüfen, ob required attributes gesetzt sind. wenn nicht = Exception! + if self.alias: + data['alias'] = self.alias + if self.name: + data['name'] = self.name + if self.contactEmail: + data['dataverseContacts'] = [] + for email in self.contactEmail: + data['dataverseContacts'].append({'contactEmail': email}) + if self.affiliation: + data['affiliation'] = self.affiliation + if self.description: + data['description'] = self.description + if self.dataverseType: + data['dataverseType'] = self.dataverseType + + return data + + @property + def json(self): + """Get Dataverse metadata as json for Dataverse API upload. + + TODO: Validate standard + + Example: Default dataverse metadata json: + { + "name": "Scientific Research", + "alias": "science", + "dataverseContacts": [ + { + "contactEmail": "pi@example.edu" + }, + { + "contactEmail": "student@example.edu" + } + ], + "affiliation": "Scientific Research University", + "description": "We do all the science.", + "dataverseType": "LABORATORY" + } + + """ + return dict_to_json(self.dict) + + def import_data(self, filename, format): + """Import data from different sources. + + does: mappen der metadaten in die interne datenstruktur -> set() + + Example: Default dataverse metadata json: + { + "name": "Scientific Research", + "alias": "science", + "dataverseContacts": [ + { + "contactEmail": "pi@example.edu" + }, + { + "contactEmail": "student@example.edu" + } + ], + "affiliation": "Scientific Research University", + "description": "We do all the science.", + "dataverseType": "LABORATORY" + } + + filename: string + format: `dv_up`, `dv_down` + return: True + + """ + if format == 'dv_up' or format == 'dv_down': + data = read_file_json(filename) + # TODO: welche der variablen sind den required? wie soll damit umgegangen werden? + if 'name' in data: + self.name = data['name'] + if 'alias' in data: + self.alias = data['alias'] + if 'dataverseContacts' in data: + for contact in data['dataverseContacts']: + for key, val in contact.items(): + if key == 'contactEmail': + self.contactEmail.append(val) + if 'affiliation' in data: + self.affiliation = data['affiliation'] + if 'description' in data: + self.description = data['description'] + if 'dataverseType' in data: + self.dataverseType = data['dataverseType'] + else: + # TODO: Exception + print('Data-format not right') + + def export_data(self, filename, format): + """Export data to different file-formats. + + format: `dv_up` + + """ + if format == 'dv_up': + return write_file_json(filename, self.dict) + else: + # TODO: Exception + print('Data-format not right') From 80576dc18fd4d61530a5635d3160788dd33cbb58 Mon Sep 17 00:00:00 2001 From: Ajax23 Date: Wed, 29 May 2019 12:17:40 +0200 Subject: [PATCH 08/46] Add metadata editing --- src/pyDataverse/api.py | 161 +++++++++++++++++++++++++++++++++++++++ src/pyDataverse/utils.py | 2 +- 2 files changed, 162 insertions(+), 1 deletion(-) diff --git a/src/pyDataverse/api.py b/src/pyDataverse/api.py index 5988dd7..6d2bf1c 100644 --- a/src/pyDataverse/api.py +++ b/src/pyDataverse/api.py @@ -14,6 +14,7 @@ from requests import delete from requests import get from requests import post +from requests import put import subprocess as sp @@ -228,6 +229,60 @@ def make_post_request(self, query_str, metadata=None, auth=False, ''.format(url) ) + def make_put_request(self, query_str, metadata=None, auth=False, + params=None): + """Make a PUT request. + + Parameters + ---------- + query_str : string + Query string for the request. Will be concatenated to + `native_api_base_url`. + metadata : string + Metadata as a json-formatted string. Defaults to `None`. + auth : bool + Should an api token be sent in the request. Defaults to `False`. + params : dict + Dictionary of parameters to be passed with the request. + Defaults to `None`. + + Returns + ------- + requests.Response + Response object of requests library. + + """ + url = '{0}{1}'.format(self.native_api_base_url, query_str) + if auth: + if self.api_token: + if not params: + params = {} + params['key'] = self.api_token + else: + ApiAuthorizationError( + 'ERROR: PUT - Api token not passed to ' + '`make_post_request` {}.'.format(url) + ) + + try: + resp = put( + url, + data=metadata, + params=params + ) + if resp.status_code == 401: + error_msg = resp.json()['message'] + raise ApiAuthorizationError( + 'ERROR: PUT HTTP 401 - Authorization error {0}. MSG: {1}' + ''.format(url, error_msg) + ) + return resp + except ConnectionError: + raise ConnectionError( + 'ERROR: PUT - Could not establish connection to api {}.' + ''.format(url) + ) + def make_delete_request(self, query_str, auth=False, params=None): """Make a DELETE request. @@ -690,6 +745,112 @@ def delete_dataset(self, identifier, auth=True): print('Dataset {} deleted'.format(identifier)) return resp + def get_dataset_metadata(self, identifier, auth=True): + """Get the metadatablocks of the fiven dataset. + + resp.status_code: + 200: metadata updated + + Parameters + ---------- + identifier : string + Doi of the dataset. e.g. `doi:10.11587/8H3N93`. + auth : bool + Should an api token be sent in the request. Defaults to `False`. + + Returns + ------- + dictionary + Metadata of given dataset + + """ + resp = self.get_dataset(identifier,auth=auth) + return resp.json()["data"]["latestVersion"]["metadataBlocks"]["citation"] + + + def edit_dataset_metadata(self, identifier, metadata, is_replace=False, auth=True): + """Edit metadata of a given dataset. `Offical documentation + `_. + + .. code-block:: bash + + PUT http://$SERVER/api/datasets/editMetadata/$id --upload-file FILENAME + + Add data to dataset fields that are blank or accept multiple values with + the following + + + .. code-block:: bash + + curl -H "X-Dataverse-key: $API_TOKEN" -X PUT $SERVER_URL/api/datasets/:persistentId/editMetadata/?persistentId=$PID --upload-file dataset-add-metadata.json + + For these edits your JSON file need only include those dataset fields + which you would like to edit. A sample JSON file may be downloaded + here: `dataset-edit-metadata-sample.json + `_ + + As an example, one could first get and save the metadate of a dataset + + .. code-block:: + + data = api.get_dataset_metadata(DOI,auth=True) + utils.write_file_json(fileName,data) + + Make changes to the file and then update the metadata in dataverse + + .. code-block:: + + data = utils.dict_to_json(utils.read_file_json(fileName)) + resp = api.edit_dataset_metadata(DOI,data,is_replace=True,auth=True) + + + resp.status_code: + 200: metadata updated + + Parameters + ---------- + identifier : string + Doi of the dataset. e.g. `doi:10.11587/8H3N93`. + metadata : string + Metadata of the Dataset as a json-formatted string. + is_replace : bool + True to replace already existing metadata. + auth : bool + Should an api token be sent in the request. Defaults to `False`. + + Returns + ------- + requests.Response + Response object of requests library. + + """ + + query_str = '/datasets/:persistentId/editMetadata/?persistentId={0}'.format( + identifier) + params = {'replace': True} if is_replace else {} + #if is_replace: query_str += "&replace=true" + + resp = self.make_put_request(query_str, metadata, auth, params) + + if resp.status_code == 401: + error_msg = resp.json()['message'] + raise ApiAuthorizationError( + 'ERROR: HTTP 401 - Updating metadata unauthorized. MSG: ' + ''.format(error_msg) + ) + elif resp.status_code == 400: + if 'Error parsing' in resp.json()['message']: + print('Wrong passed data format.') + else: + print('You may not add data to a field that already has data ' + + 'and does not allow multiples. ' + + 'Use is_replace=true to replace existing data.') + elif resp.status_code == 200: + # time = resp.json()['data']['lastUpdateTime'] + print('Dataset updated')# - {}.'.format(time)) + return resp + def get_datafiles(self, doi, version='1'): """List metadata of all datafiles of a dataset. diff --git a/src/pyDataverse/utils.py b/src/pyDataverse/utils.py index 0f07f77..f553ad1 100644 --- a/src/pyDataverse/utils.py +++ b/src/pyDataverse/utils.py @@ -45,7 +45,7 @@ def dict_to_json(data): """ try: - return json.dumps(data, ensure_ascii=False, indent=2) + return json.dumps(data, ensure_ascii=True, indent=2) except Exception as e: raise e From 8fe461976be12a32c3a1ce662c4da8d914212753 Mon Sep 17 00:00:00 2001 From: Ajax23 Date: Wed, 29 May 2019 12:40:56 +0200 Subject: [PATCH 09/46] Changed request def names --- src/pyDataverse/api.py | 52 +++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/src/pyDataverse/api.py b/src/pyDataverse/api.py index 6d2bf1c..994664a 100644 --- a/src/pyDataverse/api.py +++ b/src/pyDataverse/api.py @@ -118,7 +118,7 @@ def __str__(self): """ return 'pyDataverse API class' - def make_get_request(self, query_str, params=None, auth=False): + def get_request(self, query_str, params=None, auth=False): """Make a GET request. Parameters @@ -147,7 +147,7 @@ def make_get_request(self, query_str, params=None, auth=False): else: ApiAuthorizationError( 'ERROR: GET - Api token not passed to ' - '`make_get_request` {}.'.format(url) + '`get_request` {}.'.format(url) ) try: @@ -175,7 +175,7 @@ def make_get_request(self, query_str, params=None, auth=False): ''.format(url) ) - def make_post_request(self, query_str, metadata=None, auth=False, + def post_request(self, query_str, metadata=None, auth=False, params=None): """Make a POST request. @@ -207,7 +207,7 @@ def make_post_request(self, query_str, metadata=None, auth=False, else: ApiAuthorizationError( 'ERROR: POST - Api token not passed to ' - '`make_post_request` {}.'.format(url) + '`post_request` {}.'.format(url) ) try: @@ -229,7 +229,7 @@ def make_post_request(self, query_str, metadata=None, auth=False, ''.format(url) ) - def make_put_request(self, query_str, metadata=None, auth=False, + def put_request(self, query_str, metadata=None, auth=False, params=None): """Make a PUT request. @@ -261,7 +261,7 @@ def make_put_request(self, query_str, metadata=None, auth=False, else: ApiAuthorizationError( 'ERROR: PUT - Api token not passed to ' - '`make_post_request` {}.'.format(url) + '`put_request` {}.'.format(url) ) try: @@ -283,7 +283,7 @@ def make_put_request(self, query_str, metadata=None, auth=False, ''.format(url) ) - def make_delete_request(self, query_str, auth=False, params=None): + def delete_request(self, query_str, auth=False, params=None): """Make a DELETE request. Parameters @@ -312,7 +312,7 @@ def make_delete_request(self, query_str, auth=False, params=None): else: ApiAuthorizationError( 'ERROR: DELETE - Api token not passed to ' - '`make_delete_request` {}.'.format(url) + '`delete_request` {}.'.format(url) ) try: @@ -349,7 +349,7 @@ def get_dataverse(self, identifier, auth=False): """ query_str = '/dataverses/{0}'.format(identifier) - resp = self.make_get_request(query_str, auth=auth) + resp = self.get_request(query_str, auth=auth) return resp def create_dataverse(self, identifier, metadata, auth=True, @@ -398,7 +398,7 @@ def create_dataverse(self, identifier, metadata, auth=True, ) query_str = '/dataverses/{0}'.format(parent) - resp = self.make_post_request(query_str, metadata, auth) + resp = self.post_request(query_str, metadata, auth) if resp.status_code == 404: error_msg = resp.json()['message'] @@ -441,7 +441,7 @@ def publish_dataverse(self, identifier, auth=True): """ query_str = '/dataverses/{0}/actions/:publish'.format(identifier) - resp = self.make_post_request(query_str, auth=auth) + resp = self.post_request(query_str, auth=auth) if resp.status_code == 401: error_msg = resp.json()['message'] @@ -488,7 +488,7 @@ def delete_dataverse(self, identifier, auth=True): """ query_str = '/dataverses/{0}'.format(identifier) - resp = self.make_delete_request(query_str, auth) + resp = self.delete_request(query_str, auth) if resp.status_code == 401: error_msg = resp.json()['message'] @@ -547,7 +547,7 @@ def get_dataset(self, identifier, auth=True, is_doi=True): identifier) else: query_str = '/datasets/{0}'.format(identifier) - resp = self.make_get_request(query_str, auth=auth) + resp = self.get_request(query_str, auth=auth) return resp def get_dataset_export(self, identifier, export_format): @@ -575,7 +575,7 @@ def get_dataset_export(self, identifier, export_format): """ query_str = '/datasets/export?exporter={0}&persistentId={1}'.format( export_format, identifier) - resp = self.make_get_request(query_str) + resp = self.get_request(query_str) return resp def create_dataset(self, dataverse, metadata, auth=True): @@ -617,7 +617,7 @@ def create_dataset(self, dataverse, metadata, auth=True): """ query_str = '/dataverses/{0}/datasets'.format(dataverse) - resp = self.make_post_request(query_str, metadata, auth) + resp = self.post_request(query_str, metadata, auth) if resp.status_code == 404: error_msg = resp.json()['message'] @@ -682,7 +682,7 @@ def publish_dataset(self, identifier, type='minor', auth=True): """ query_str = '/datasets/:persistentId/actions/:publish' query_str += '?persistentId={0}&type={1}'.format(identifier, type) - resp = self.make_post_request(query_str, auth=auth) + resp = self.post_request(query_str, auth=auth) if resp.status_code == 404: error_msg = resp.json()['message'] @@ -720,7 +720,7 @@ def delete_dataset(self, identifier, auth=True): """ query_str = '/datasets/:persistentId/?persistentId={0}'.format( identifier) - resp = self.make_delete_request(query_str, auth=auth) + resp = self.delete_request(query_str, auth=auth) if resp.status_code == 404: error_msg = resp.json()['message'] @@ -831,7 +831,7 @@ def edit_dataset_metadata(self, identifier, metadata, is_replace=False, auth=Tru params = {'replace': True} if is_replace else {} #if is_replace: query_str += "&replace=true" - resp = self.make_put_request(query_str, metadata, auth, params) + resp = self.put_request(query_str, metadata, auth, params) if resp.status_code == 401: error_msg = resp.json()['message'] @@ -873,7 +873,7 @@ def get_datafiles(self, doi, version='1'): """ base_str = '/datasets/:persistentId/versions/' query_str = base_str+'{0}/files?persistentId={1}'.format(version, doi) - resp = self.make_get_request(query_str) + resp = self.get_request(query_str) return resp def get_datafile(self, identifier): @@ -897,7 +897,7 @@ def get_datafile(self, identifier): """ query_str = '/access/datafile/{0}'.format(identifier) - resp = self.make_get_request(query_str) + resp = self.get_request(query_str) return resp def get_datafile_bundle(self, identifier): @@ -933,7 +933,7 @@ def get_datafile_bundle(self, identifier): """ query_str = '/access/datafile/bundle/{0}'.format(identifier) - data = self.make_get_request(query_str) + data = self.get_request(query_str) return data def upload_file(self, identifier, filename): @@ -987,7 +987,7 @@ def get_info_version(self): """ query_str = '/info/version' - resp = self.make_get_request(query_str) + resp = self.get_request(query_str) return resp def get_info_server(self): @@ -1005,7 +1005,7 @@ def get_info_server(self): """ query_str = '/info/server' - resp = self.make_get_request(query_str) + resp = self.get_request(query_str) return resp def get_info_apiTermsOfUse(self): @@ -1023,7 +1023,7 @@ def get_info_apiTermsOfUse(self): """ query_str = '/info/apiTermsOfUse' - resp = self.make_get_request(query_str) + resp = self.get_request(query_str) return resp def get_metadatablocks(self): @@ -1040,7 +1040,7 @@ def get_metadatablocks(self): """ query_str = '/metadatablocks' - resp = self.make_get_request(query_str) + resp = self.get_request(query_str) return resp def get_metadatablock(self, identifier): @@ -1063,5 +1063,5 @@ def get_metadatablock(self, identifier): """ query_str = '/metadatablocks/{0}'.format(identifier) - resp = self.make_get_request(query_str) + resp = self.get_request(query_str) return resp From fd1d554ca75aa1cccbf3fb97d14bd20558245460 Mon Sep 17 00:00:00 2001 From: Ajax23 Date: Wed, 29 May 2019 12:44:20 +0200 Subject: [PATCH 10/46] Resolve conflict --- src/pyDataverse/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pyDataverse/api.py b/src/pyDataverse/api.py index 994664a..f9b264f 100644 --- a/src/pyDataverse/api.py +++ b/src/pyDataverse/api.py @@ -176,7 +176,7 @@ def get_request(self, query_str, params=None, auth=False): ) def post_request(self, query_str, metadata=None, auth=False, - params=None): + params=None): """Make a POST request. Parameters From 593a8f13dde3a815c84ae81ec3e54366c746f22f Mon Sep 17 00:00:00 2001 From: Ajax23 Date: Wed, 29 May 2019 12:45:25 +0200 Subject: [PATCH 11/46] Resolve conflict 2 --- src/pyDataverse/api.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/pyDataverse/api.py b/src/pyDataverse/api.py index f9b264f..8b0be34 100644 --- a/src/pyDataverse/api.py +++ b/src/pyDataverse/api.py @@ -230,7 +230,7 @@ def post_request(self, query_str, metadata=None, auth=False, ) def put_request(self, query_str, metadata=None, auth=False, - params=None): + params=None): """Make a PUT request. Parameters @@ -283,6 +283,7 @@ def put_request(self, query_str, metadata=None, auth=False, ''.format(url) ) + def delete_request(self, query_str, auth=False, params=None): """Make a DELETE request. From 948f3de296bcf3bb2e32a7e256ab954609b2ce0a Mon Sep 17 00:00:00 2001 From: Stefan Kasberger Date: Wed, 29 May 2019 19:04:25 +0200 Subject: [PATCH 12/46] start with Dataset() data model --- src/pyDataverse/models.py | 339 +++++++++++++++++++++++++++++++++++++- 1 file changed, 337 insertions(+), 2 deletions(-) diff --git a/src/pyDataverse/models.py b/src/pyDataverse/models.py index 458283e..992fd63 100644 --- a/src/pyDataverse/models.py +++ b/src/pyDataverse/models.py @@ -18,14 +18,14 @@ class Dataverse(object): """Base class for the Dataverse model. * data - * dict: dict mit key value pairs übergeben, wo key exakt das attribut ist. + * dict: dict mit key value pairs übergeben, wo key exakt das attributist. * optional: list: liste tuples (links key, rechts value) übergeben, wo key exakt das attribut ist. * does: set metadata functions: dicts mit key-value pairs übergeben. die keys müssen wie die metadata attribute """ def __init__(self): - """Init a `Dataverse()` class.""" + """Init `Dataverse()` class.""" self.name = None self.alias = None self.contactEmail = [] @@ -33,6 +33,7 @@ def __init__(self): self.description = None self.dataverseType = None self.datasets = [] + self.dataverses = [] def __str__(self): """Return name of Dataverse() class for users.""" @@ -153,6 +154,10 @@ def import_data(self, filename, format): format: `dv_up`, `dv_down` return: True + dv = Dataverse() + dv.import_data('data/dataverse/dataverse-complete.json', 'dv_up') + dv.contactEmail + """ if format == 'dv_up' or format == 'dv_down': data = read_file_json(filename) @@ -187,3 +192,333 @@ def export_data(self, filename, format): else: # TODO: Exception print('Data-format not right') + + +class Dataset(object): + """Base class for the Dataset model.""" + + def __init__(self): + """Init Dataset() class.""" + """Dataset""" + self.license = None + self.termsOfUse = None + self.termsOfAccess = None + + """Citation""" + self.citation_displayName = None + self.title = None + self.subtitle = None + self.alternativeTitle = None + self.alternativeURL = None + self.otherId = [] + # self.otherIdAgency + # self.otherIdValue + self.author = [] + # self.authorName + # self.authorAffiliation + # self.authorIdentifierScheme + # self.authorIdentifier + self.datasetContact = [] + # self.datasetContactName + # self.datasetContactAffiliation + # self.datasetContactEmail + self.dsDescription = [] + # self.dsDescriptionValue + # self.dsDescriptionDate + self.subject = [] + self.keyword = [] + # self.keywordValue + # self.keywordVocabulary + # self.keywordVocabularyURI + self.topicClassification = [] + # self.topicClassValue + # self.topicClassVocab + self.publication = [] + # self.publicationCitation + # self.publicationIDType + # self.publicationIDNumber + # self.publicationURL + self.notesText = None + self.producer = [] + # self.producerName + # self.producerAffiliation + # self.producerAbbreviation + # self.producerURL + # self.producerLogoURL + self.productionDate = None + self.productionPlace = None + self.contributor = [] + # self.contributorType + # self.contributorName + self.grantNumber = [] + # self.grantNumberAgency + # self.grantNumberValue + self.distributor = [] + # self.distributorName + # self.distributorAffiliation + # self.distributorAbbreviation + # self.distributorURL + # self.distributorLogoURL + self.distributionDate = None + self.depositor = None + self.dateOfDeposit = None + self.timePeriodCovered = [] + # self.timePeriodCoveredStart + # self.timePeriodCoveredEnd + self.dateOfCollection = [] + # self.dateOfCollectionStart + # self.dateOfCollectionEnd + self.kindOfData = [] + self.series = [] + # self.seriesName + # self.seriesInformation + self.software = [] + # self.softwareName + # self.softwareVersion + self.relatedMaterial = [] + self.relatedDatasets = [] + self.otherReferences = [] + self.dataSources = [] + self.originOfSources = None + self.characteristicOfSources = None + self.accessToSources = None + + """Geospatial""" + self.geospatial_displayName = None + self.geographicCoverage = [] + # self.country + # self.state + # self.city + # self.otherGeographicCoverage + self.geographicUnit = None + self.geographicBoundingBox = [] + # self.westLongitude + # self.eastLongitude + # self.northLongitude + # self.southLongitude + + """SocialScience""" + self.socialscience_displayName = None + self.unitOfAnalysis = [] + self.universe = [] + self.timeMethod = None + self.dataCollector = None + self.collectorTraining = None + self.frequencyOfDataCollection = None + self.samplingProcedure = None + self.targetSampleSize = [] + # self.targetSampleActualSize + # self.targetSampleSizeFormula + self.deviationsFromSampleDesign = None + self.collectionMode = None + self.researchInstrument = None + self.dataCollectionSituation = None + self.actionsToMinimizeLoss = None + self.controlOperations = None + self.weighting = None + self.cleaningOperations = None + self.datasetLevelErrorNotes = None + self.responseRate = None + self.samplingErrorEstimates = None + self.otherDataAppraisal = None + self.socialScienceNotesType = None + self.socialScienceNotesSubject = None + self.socialScienceNotesText = None + + """Journal""" + self.journal_displayName = None + self.journalVolumeIssue = [] + # self.journalVolume + # self.journalIssue + # self.journalPubDate + self.journalArticleType = None + + def __str__(self): + """Return name of Dataset() class for users.""" + return 'pyDataverse Dataset() model class.' + + @property + def dict(self): + """Get Dataset metadata as dict for Dataverse API upload. + + TODO: Validate standard + + """ + data = {} + data['datasetVersion'] = {} + data['datasetVersion']['metadataBlocks'] = {} + citation = {} + citation['fields'] = [] + geospatial = {} + socialscience = {} + journal = {} + + # TODO: prüfen, ob required attributes gesetzt sind. wenn nicht = Exception! + + """Dataset""" + if self.license: + data['datasetVersion']['license'] = self.license + if self.termsOfUse: + data['datasetVersion']['termsOfUse'] = self.termsOfUse + if self.termsOfAccess: + data['datasetVersion']['termsOfAccess'] = self.termsOfAccess + + """Citation""" + if self.citation_displayName: + citation['displayName'] = self.citation_displayName + if self.title: + citation['fields'].append({'title': self.title}) + if self.subtitle: + citation['fields'].append({'subtitle': self.subtitle}) + if self.alternativeTitle: + citation['fields'].append({'alternativeTitle': self.alternativeTitle}) + if self.alternativeURL: + citation['fields'].append({'alternativeURL': self.alternativeURL}) + if self.otherId: + pass + if self.author: + pass + if self.datasetContact: + pass + if self.dsDescription: + pass + if self.subject: + citation['fields'].append({'subject': self.subject}) + if self.keyword: + pass + if self.topicClassification: + pass + if self.publication: + pass + if self.notesText: + citation['fields'].append({'notesText': self.notesText}) + if self.producer: + pass + if self.productionDate: + citation['fields'].append({'productionDate': self.productionDate}) + if self.productionPlace: + citation['fields'].append({'productionPlace': self.productionPlace}) + if self.contributor: + pass + if self.grantNumber: + pass + if self.distributor: + pass + if self.distributionDate: + citation['fields'].append({'distributionDate': self.distributionDate}) + if self.depositor: + citation['fields'].append({'depositor': self.depositor}) + if self.dateOfDeposit: + citation['fields'].append({'dateOfDeposit': self.dateOfDeposit}) + if self.timePeriodCovered: + pass + if self.dateOfCollection: + pass + if self.kindOfData: + citation['fields'].append({'kindOfData': self.kindOfData}) + if self.series: + pass + if self.software: + pass + if self.relatedMaterial: + citation['fields'].append({'relatedMaterial': self.relatedMaterial}) + if self.relatedDatasets: + citation['fields'].append( + {'relatedDatasets': self.relatedDatasets}) + if self.otherReferences: + citation['fields'].append({'otherReferences': self.otherReferences}) + if self.dataSources: + citation['fields'].append({'dataSources': self.dataSources}) + if self.originOfSources: + citation['fields'].append( + {'originOfSources': self.originOfSources}) + if self.characteristicOfSources: + citation['fields'].append( + {'characteristicOfSources': self.characteristicOfSources}) + if self.accessToSources: + citation['fields'].append({'accessToSources': self.accessToSources}) + + """Geospatial""" + if self.geospatial_displayName: + data['geospatial_displayName'] = self.geospatial_displayName + if self.geographicCoverage: + pass + if self.geographicUnit: + data['geographicUnit'] = self.geographicUnit + if self.geographicBoundingBox: + pass + + """SocialScience""" + if self.socialscience_displayName: + data['socialscience_displayName'] = self.socialscience_displayName + if self.unitOfAnalysis: + data['unitOfAnalysis'] = self.unitOfAnalysis + if self.universe: + data['universe'] = self.universe + if self.timeMethod: + data['timeMethod'] = self.timeMethod + if self.dataCollector: + data['dataCollector'] = self.dataCollector + if self.collectorTraining: + data['collectorTraining'] = self.collectorTraining + if self.frequencyOfDataCollection: + data['frequencyOfDataCollection'] = self.frequencyOfDataCollection + if self.samplingProcedure: + data['samplingProcedure'] = self.samplingProcedure + if self.targetSampleSize: + pass + if self.deviationsFromSampleDesign: + data['deviationsFromSampleDesign'] = self.deviationsFromSampleDesign + if self.collectionMode: + data['collectionMode'] = self.collectionMode + if self.researchInstrument: + data['researchInstrument'] = self.researchInstrument + if self.dataCollectionSituation: + data['dataCollectionSituation'] = self.dataCollectionSituation + if self.actionsToMinimizeLoss: + data['actionsToMinimizeLoss'] = self.actionsToMinimizeLoss + if self.controlOperations: + data['controlOperations'] = self.controlOperations + if self.weighting: + data['weighting'] = self.weighting + if self.cleaningOperations: + data['cleaningOperations'] = self.cleaningOperations + if self.datasetLevelErrorNotes: + data['datasetLevelErrorNotes'] = self.datasetLevelErrorNotes + if self.responseRate: + data['responseRate'] = self.responseRate + if self.samplingErrorEstimates: + data['samplingErrorEstimates'] = self.samplingErrorEstimates + if self.otherDataAppraisal: + data['otherDataAppraisal'] = self.otherDataAppraisal + if self.socialScienceNotesType: + data['socialScienceNotesType'] = self.socialScienceNotesType + if self.socialScienceNotesSubject: + data['socialScienceNotesSubject'] = self.socialScienceNotesSubject + if self.socialScienceNotesText: + data['socialScienceNotesText'] = self.socialScienceNotesText + + """Journal""" + if self.journal_displayName: + data['journal_displayName'] = self.journal_displayName + if self.journalVolumeIssue: + pass + if self.journalArticleType: + data['journalArticleType'] = self.journalArticleType + + data['datasetVersion']['metadataBlocks']['citation'] = citation + data['datasetVersion']['metadataBlocks'][''] = socialscience + data['datasetVersion']['metadataBlocks'][''] = geospatial + data['datasetVersion']['metadataBlocks'][''] = journal + + return data + + @property + def json(self): + """Get Dataset metadata as json for Dataverse API upload. + + TODO: Validate standard + TODO: Link to default json file + + """ + return dict_to_json(self.dict) From 873255dfdcb8b377636a611b76a6da8e85832ea2 Mon Sep 17 00:00:00 2001 From: Stefan Kasberger Date: Tue, 4 Jun 2019 18:29:19 +0200 Subject: [PATCH 13/46] re-factor Dataverse() class; add is_valid() to Dataverse(); add export_metadata() to Dataverse(); add import_metadata and parse_dicts() to Dataset() --- src/pyDataverse/models.py | 539 +++++++++++++++++++++++++++----------- 1 file changed, 381 insertions(+), 158 deletions(-) diff --git a/src/pyDataverse/models.py b/src/pyDataverse/models.py index 992fd63..f838064 100644 --- a/src/pyDataverse/models.py +++ b/src/pyDataverse/models.py @@ -23,6 +23,18 @@ class Dataverse(object): * does: set metadata functions: dicts mit key-value pairs übergeben. die keys müssen wie die metadata attribute """ + __attr_required = [ + 'alias', + 'name', + 'contactEmail' + ] + __attr_flat = [ + 'alias', + 'name', + 'affiliation', + 'description', + 'dataverseType' + ] def __init__(self): """Init `Dataverse()` class.""" @@ -35,78 +47,45 @@ def __init__(self): self.datasets = [] self.dataverses = [] + + def __str__(self): """Return name of Dataverse() class for users.""" return 'pyDataverse Dataverse() model class.' def set(self, data): - """Set attributes.""" - if isinstance(data, list): - # TODO: prüfen, ob die struktur passt - data = dict(data) - elif not isinstance(data, dict): - # TODO: Exception raisen - print('Data was not passed in the correct data type. Dict() or ' - 'List() required.') - - for key, val in data.items(): - # TODO: prüfen, ob es sich immer um strings handelt bei den keys und values. - if key == 'alias': - self.alias = val - elif key == 'name': - self.name = val - elif key == 'contactEmail': - # TODO: add oder overwrite?? - if isinstance(val, list): - for email in val: - self.contactEmail.append(email) - elif isinstance(val, str): - self.contactEmail.append(val) - else: - # TODO: Exception - print('contactEmail "{}" not a list or a string. Do not' - ' know what to do'.format(val)) - elif key == 'affiliation': - self.affiliation = val - elif key == 'description': - self.description = val - elif key == 'dataverseType': - self.dataverseType = val - else: - print('Key "{}" passed is not valid'.format(key)) - - @property - def dict(self): - """Get Dataverse metadata as dict for Dataverse API upload. + """Set attributes. - TODO: Validate standard + Takes a dict with Key-Value pairs containing dataverse metadata. + Keys: attribute name. named after dataverse up standard. + Value: attribute value. types must be compatible for dataverse up. """ - data = {} + for key, val in data.items(): + self.__setattr__(key, val) - # prüfen, ob required attributes gesetzt sind. wenn nicht = Exception! - if self.alias: - data['alias'] = self.alias - if self.name: - data['name'] = self.name - if self.contactEmail: - data['dataverseContacts'] = [] - for email in self.contactEmail: - data['dataverseContacts'].append({'contactEmail': email}) - if self.affiliation: - data['affiliation'] = self.affiliation - if self.description: - data['description'] = self.description - if self.dataverseType: - data['dataverseType'] = self.dataverseType + def is_valid(self): + """Check if metadata stored in attributes is valid for dataverse api upload. - return data + name, alias and dataverseContact are required fields. dataverseContact + is stored as list of emails in contactEmail, so contactEmail can not be + none. + """ + is_valid = True + for attr in DS_ATTR_REQUIRED: + if not self.__getattribute__(attr): + is_valid = False + print('attribute \'{0}\' missing.'.format(attr)) + return is_valid + + def import_metadata(self, filename, format): + """Import data from different sources. - @property - def json(self): - """Get Dataverse metadata as json for Dataverse API upload. + It is allowed to import incomplete Dataverses, where required + attributes are missing. - TODO: Validate standard + Simmply parse in the data. No validation needed. This will be done + later before the export. Example: Default dataverse metadata json: { @@ -125,13 +104,70 @@ def json(self): "dataverseType": "LABORATORY" } + filename: string + format: `dv_up`, `dv_down` + """ - return dict_to_json(self.dict) + data = {} + if format == 'dv_up': + metadata = read_file_json(filename) + # get first level metadata and parse it automatically + for attr in DV_ATTR_FLAT: + data[attr] = metadata[attr] + + # get nested metadata and parse it manually + if 'dataverseContacts' in metadata: + data['contactEmail'] = [] + for contact in metadata['dataverseContacts']: + for key, val in contact.items(): + if key == 'contactEmail': + data['contactEmail'].append(val) + self.set(data) + elif format == 'dv_down': + metadata = read_file_json(filename) + self.set(data) + else: + # TODO: Exception + print('Data-format not right') - def import_data(self, filename, format): - """Import data from different sources. + @property + def dict(self): + """Get Dataverse metadata as dict for Dataverse API upload. + + TODO: Validate standard - does: mappen der metadaten in die interne datenstruktur -> set() + """ + if self.is_valid(): + data = {} + """ + dv_attr_list contains all metadata related attributes, which are + mapped on the first level of the dataverse up metadata structure. + This should help to shorten code + """ + for attr in DV_ATTR_FLAT: + if self.__getattribute__(attr): + data[attr] = self.__getattribute__(attr) + else: + print('attr {0} not in data model.'.format(attr)) + + # prüfen, ob required attributes gesetzt sind. wenn nicht = Exception! + if self.contactEmail: + data['dataverseContacts'] = [] + for email in self.contactEmail: + data['dataverseContacts'].append({'contactEmail': email}) + else: + print('Key contactEmail not in data model.') + + return data + else: + print('dict can not be created. Data is not valid') + return None + + @property + def json(self): + """Get Dataverse metadata as json for Dataverse API upload. + + TODO: Validate standard Example: Default dataverse metadata json: { @@ -150,38 +186,10 @@ def import_data(self, filename, format): "dataverseType": "LABORATORY" } - filename: string - format: `dv_up`, `dv_down` - return: True - - dv = Dataverse() - dv.import_data('data/dataverse/dataverse-complete.json', 'dv_up') - dv.contactEmail - """ - if format == 'dv_up' or format == 'dv_down': - data = read_file_json(filename) - # TODO: welche der variablen sind den required? wie soll damit umgegangen werden? - if 'name' in data: - self.name = data['name'] - if 'alias' in data: - self.alias = data['alias'] - if 'dataverseContacts' in data: - for contact in data['dataverseContacts']: - for key, val in contact.items(): - if key == 'contactEmail': - self.contactEmail.append(val) - if 'affiliation' in data: - self.affiliation = data['affiliation'] - if 'description' in data: - self.description = data['description'] - if 'dataverseType' in data: - self.dataverseType = data['dataverseType'] - else: - # TODO: Exception - print('Data-format not right') + return dict_to_json(self.dict) - def export_data(self, filename, format): + def export_metadata(self, filename, format): """Export data to different file-formats. format: `dv_up` @@ -191,90 +199,151 @@ def export_data(self, filename, format): return write_file_json(filename, self.dict) else: # TODO: Exception - print('Data-format not right') + print('Data-format not right.') class Dataset(object): """Base class for the Dataset model.""" + __attr_required = [ + 'displayName', + 'title', + 'author', + 'datasetContact', + 'dsDescription', + 'subject' + ] + + __attr_flat = [ + 'license', + 'termsOfUse', + 'termsOfAccess' + ] + __attr_citation_flat = [ + 'title', + 'subtitle', + 'alternativeTitle', + 'alternativeURL', + 'subject', + 'notesText', + 'productionDate', + 'productionPlace', + 'grantNumber', + 'distributor', + 'distributionDate', + 'depositor', + 'dateOfDeposit', + 'kindOfData', + 'relatedMaterial', + 'relatedDatasets', + 'otherReferences', + 'dataSources', + 'originOfSources', + 'characteristicOfSources', + 'accessToSources', + 'kindOfData', + 'subject' + ] + + __attr_citation_arrays = { + 'otherId': ['otherIdAgency', 'otherIdValue'], + 'author': ['authorName', 'authorAffiliation', 'authorIdentifierScheme', 'authorIdentifier'], + 'datasetContact': ['datasetContactName', 'datasetContactAffiliation', 'datasetContactEmail'], + 'dsDescription': ['dsDescriptionValue', 'dsDescriptionDate'], + 'keyword': ['keywordValue', 'keywordVocabulary', 'keywordVocabularyURI'], + 'producer': ['producerName', 'producerAffiliation', 'producerAbbreviation', 'producerURL', 'producerLogoURL'], + 'contributor': ['contributorType', 'contributorName'], + 'grantNumber': ['grantNumberAgency', 'grantNumberValue'], + 'topicClassification': ['topicClassValue', 'topicClassVocab'], + 'publication': ['publicationCitation', 'publicationIDType', 'publicationIDNumber', 'publicationURL'], + 'distributor': ['distributorName', 'distributorAffiliation', 'distributorAbbreviation', 'distributorURL', 'distributorLogoURL'], + 'timePeriodCovered': ['timePeriodCoveredStart', 'timePeriodCoveredEnd'], + 'dateOfCollection': ['dateOfCollectionStart', 'dateOfCollectionEnd'], + 'software': ['softwareName', 'softwareVersion'] + } + + __attr_geospatial_flat = [ + 'geographicUnit', + 'geographicBoundingBox' + ] + + __attr_geospatial_arrays = { + 'geographicCoverage': ['country', 'state', 'city', 'otherGeographicCoverage'], + 'geographicBoundingBox': ['westLongitude', 'eastLongitude', 'northLongitude', 'southLongitude'] + } + + __attr_socialscience_flat = [ + 'unitOfAnalysis', + 'universe', + 'timeMethod', + 'dataCollector', + 'collectorTraining', + 'frequencyOfDataCollection', + 'samplingProcedure', + 'deviationsFromSampleDesign', + 'collectionMode', + 'researchInstrument', + 'dataCollectionSituation', + 'actionsToMinimizeLoss', + 'controlOperations', + 'weighting', + 'cleaningOperations', + 'datasetLevelErrorNotes', + 'responseRate', + 'samplingErrorEstimates', + 'otherDataAppraisal', + ] + + __attr_socialscience_arrays = { + 'targetSampleSize': ['targetSampleActualSize', 'targetSampleSizeFormula'], + 'socialScienceNotes': ['socialScienceNotesType', 'socialScienceNotesSubject', 'socialScienceNotesText'] + } + + __attr_journal_flat = [ + 'journalVolumeIssue', + 'journalArticleType' + ] + + __attr_journal_arrays = { + 'journalVolumeIssue': ['journalVolume', 'journalIssue', 'journalPubDate'] + } + def __init__(self): """Init Dataset() class.""" - """Dataset""" + """dataset""" self.license = None self.termsOfUse = None self.termsOfAccess = None - """Citation""" + """citation""" self.citation_displayName = None self.title = None self.subtitle = None self.alternativeTitle = None self.alternativeURL = None self.otherId = [] - # self.otherIdAgency - # self.otherIdValue self.author = [] - # self.authorName - # self.authorAffiliation - # self.authorIdentifierScheme - # self.authorIdentifier self.datasetContact = [] - # self.datasetContactName - # self.datasetContactAffiliation - # self.datasetContactEmail self.dsDescription = [] - # self.dsDescriptionValue - # self.dsDescriptionDate self.subject = [] self.keyword = [] - # self.keywordValue - # self.keywordVocabulary - # self.keywordVocabularyURI self.topicClassification = [] - # self.topicClassValue - # self.topicClassVocab self.publication = [] - # self.publicationCitation - # self.publicationIDType - # self.publicationIDNumber - # self.publicationURL self.notesText = None self.producer = [] - # self.producerName - # self.producerAffiliation - # self.producerAbbreviation - # self.producerURL - # self.producerLogoURL self.productionDate = None self.productionPlace = None self.contributor = [] - # self.contributorType - # self.contributorName self.grantNumber = [] - # self.grantNumberAgency - # self.grantNumberValue self.distributor = [] - # self.distributorName - # self.distributorAffiliation - # self.distributorAbbreviation - # self.distributorURL - # self.distributorLogoURL self.distributionDate = None self.depositor = None self.dateOfDeposit = None self.timePeriodCovered = [] - # self.timePeriodCoveredStart - # self.timePeriodCoveredEnd self.dateOfCollection = [] - # self.dateOfCollectionStart - # self.dateOfCollectionEnd self.kindOfData = [] self.series = [] - # self.seriesName - # self.seriesInformation self.software = [] - # self.softwareName - # self.softwareVersion self.relatedMaterial = [] self.relatedDatasets = [] self.otherReferences = [] @@ -283,21 +352,13 @@ def __init__(self): self.characteristicOfSources = None self.accessToSources = None - """Geospatial""" + """geospatial""" self.geospatial_displayName = None self.geographicCoverage = [] - # self.country - # self.state - # self.city - # self.otherGeographicCoverage self.geographicUnit = None self.geographicBoundingBox = [] - # self.westLongitude - # self.eastLongitude - # self.northLongitude - # self.southLongitude - """SocialScience""" + """socialscience""" self.socialscience_displayName = None self.unitOfAnalysis = [] self.universe = [] @@ -307,8 +368,6 @@ def __init__(self): self.frequencyOfDataCollection = None self.samplingProcedure = None self.targetSampleSize = [] - # self.targetSampleActualSize - # self.targetSampleSizeFormula self.deviationsFromSampleDesign = None self.collectionMode = None self.researchInstrument = None @@ -325,18 +384,167 @@ def __init__(self): self.socialScienceNotesSubject = None self.socialScienceNotesText = None - """Journal""" + """journal""" self.journal_displayName = None self.journalVolumeIssue = [] - # self.journalVolume - # self.journalIssue - # self.journalPubDate self.journalArticleType = None def __str__(self): """Return name of Dataset() class for users.""" return 'pyDataverse Dataset() model class.' + def set(self, data): + """Set attributes. + + Takes a dict with Key-Value pairs containing dataverse metadata. + Keys: attribute name. named after dataverse up standard. + Value: attribute value. types must be compatible for dataverse up. + + """ + for key, val in data.items(): + self.__setattr__(key, val) + + def is_valid(self): + """Check if metadata stored in attributes is valid for dataverse api upload. + + required: ?? + TODO: Test out required fields or ask Harvard. + + """ + is_valid = True + for attr in self.__attr_required: + if not self.__getattribute__(attr): + is_valid = False + print('attribute \'{0}\' missing.'.format(attr)) + return is_valid + + def import_metadata(self, filename, format): + """Import metadata.""" + data = {} + if format == 'dv_up': + metadata = read_file_json(filename) + """dataset""" + # get first level metadata and parse it automatically + for key, val in metadata['datasetVersion'].items(): + if key in self.__attr_flat: + data[key] = val + + # get nested metadata and parse it manually + if 'dataverseContacts' in metadata: + data['contactEmail'] = [] + for contact in metadata['dataverseContacts']: + for key, val in contact.items(): + if key == 'contactEmail': + data['contactEmail'].append(val) + + """citation""" + if 'citation' in metadata['datasetVersion']['metadataBlocks']: + citation = metadata['datasetVersion']['metadataBlocks']['citation'] + if 'displayName' in citation: + data['citation_displayName'] = citation['displayName'] + + for field in citation['fields']: + if field['typeName'] in self.__attr_citation_flat: + data[field['typeName']] = field['value'] + + if field['typeName'] in self.__attr_citation_arrays: + data[field['typeName']] = self.__parse_dicts( + field['value'], + self.__attr_citation_arrays[field['typeName']]) + + if field['typeName'] == 'series': + if 'seriesName' in field['value']: + data['seriesName'] = field['value']['seriesName'] + if 'seriesInformation' in field['value']: + data['seriesInformation'] = field['value']['seriesInformation'] + else: + # TODO: Exception + print('citation not in json') + + """geospatial""" + if 'geospatial' in metadata['datasetVersion']['metadataBlocks']: + geospatial = metadata['datasetVersion']['metadataBlocks']['geospatial'] + if 'displayName' in geospatial: + self.__setattr__('geospatial_displayName', geospatial['displayName']) + + for field in geospatial['fields']: + if field['typeName'] in self.__attr_geospatial_flat: + data[field['typeName']] = field['value'] + + if field['typeName'] in self.__attr_geospatial_arrays: + data[field['typeName']] = self.__parse_dicts( + field['value'], + self.__attr_geospatial_arrays[field['typeName']]) + else: + # TODO: Exception + print('geospatial not in json') + + """socialscience""" + if 'socialscience' in metadata['datasetVersion']['metadataBlocks']: + socialscience = metadata['datasetVersion']['metadataBlocks']['socialscience'] + if 'displayName' in socialscience: + self.__setattr__('socialscience_displayName', socialscience['displayName']) + + for field in socialscience['fields']: + if field['typeName'] in self.__attr_socialscience_flat: + data[field['typeName']] = field['value'] + + if field['typeName'] in self.__attr_socialscience_arrays: + data[field['typeName']] = self.__parse_dicts( + field['value'], + self.__attr_socialscience_arrays[field['typeName']]) + else: + # TODO: Exception + print('socialscience not in json') + + """journal""" + if 'journal' in metadata['datasetVersion']['metadataBlocks']: + journal = metadata['datasetVersion']['metadataBlocks']['journal'] + if 'displayName' in journal: + self.__setattr__('journal_displayName', journal['displayName']) + + for field in journal['fields']: + if field['typeName'] in self.__attr_journal_flat: + data[field['typeName']] = field['value'] + + if field['typeName'] in self.__attr_journal_arrays: + data[field['typeName']] = self.__parse_dicts( + field['value'], + self.__attr_journal_arrays[field['typeName']]) + else: + # TODO: Exception + print('journal not in json') + + self.set(data) + elif format == 'dv_down': + metadata = read_file_json(filename) + self.set(data) + else: + # TODO: Exception + print('Data-format not right') + + def __parse_dicts(self, data, attr_list): + """Parse out list of dicts. + + data: list of dicts + attr_list: list of attributes to be parsed out. + + return: list of dicts + + """ + data_tmp = [] + + for d in data: + tmp_dict = {} + for key, val in d.items(): + if key in attr_list: + tmp_dict[key] = val['value'] + else: + print('Key \'{0}\' not in attribute list'.format(key)) + data_tmp.append(tmp_dict) + + return data_tmp + @property def dict(self): """Get Dataset metadata as dict for Dataverse API upload. @@ -356,6 +564,7 @@ def dict(self): # TODO: prüfen, ob required attributes gesetzt sind. wenn nicht = Exception! """Dataset""" + if self.license: data['datasetVersion']['license'] = self.license if self.termsOfUse: @@ -364,6 +573,8 @@ def dict(self): data['datasetVersion']['termsOfAccess'] = self.termsOfAccess """Citation""" + + # Fields[] if self.citation_displayName: citation['displayName'] = self.citation_displayName if self.title: @@ -507,9 +718,9 @@ def dict(self): data['journalArticleType'] = self.journalArticleType data['datasetVersion']['metadataBlocks']['citation'] = citation - data['datasetVersion']['metadataBlocks'][''] = socialscience - data['datasetVersion']['metadataBlocks'][''] = geospatial - data['datasetVersion']['metadataBlocks'][''] = journal + data['datasetVersion']['metadataBlocks']['socialscience'] = socialscience + data['datasetVersion']['metadataBlocks']['geospatial'] = geospatial + data['datasetVersion']['metadataBlocks']['journal'] = journal return data @@ -522,3 +733,15 @@ def json(self): """ return dict_to_json(self.dict) + + def export_metadata(self, filename, format): + """Export data to different file-formats. + + format: `dv_up` + + """ + if format == 'dv_up': + return write_file_json(filename, self.dict) + else: + # TODO: Exception + print('Data-format not right.') From 4342aceb35a50e0a28573d23f1aa2d3fa9ad4056 Mon Sep 17 00:00:00 2001 From: Stefan Kasberger Date: Tue, 4 Jun 2019 23:00:05 +0200 Subject: [PATCH 14/46] add export_metadata(), dict and json to Dataset() --- src/pyDataverse/models.py | 350 +++++++++++++++++++------------------- 1 file changed, 173 insertions(+), 177 deletions(-) diff --git a/src/pyDataverse/models.py b/src/pyDataverse/models.py index f838064..5fa5737 100644 --- a/src/pyDataverse/models.py +++ b/src/pyDataverse/models.py @@ -23,6 +23,7 @@ class Dataverse(object): * does: set metadata functions: dicts mit key-value pairs übergeben. die keys müssen wie die metadata attribute """ + __attr_required = [ 'alias', 'name', @@ -47,8 +48,6 @@ def __init__(self): self.datasets = [] self.dataverses = [] - - def __str__(self): """Return name of Dataverse() class for users.""" return 'pyDataverse Dataverse() model class.' @@ -72,7 +71,7 @@ def is_valid(self): none. """ is_valid = True - for attr in DS_ATTR_REQUIRED: + for attr in self.__attr_required: if not self.__getattribute__(attr): is_valid = False print('attribute \'{0}\' missing.'.format(attr)) @@ -112,7 +111,7 @@ def import_metadata(self, filename, format): if format == 'dv_up': metadata = read_file_json(filename) # get first level metadata and parse it automatically - for attr in DV_ATTR_FLAT: + for attr in self.__attr_flat: data[attr] = metadata[attr] # get nested metadata and parse it manually @@ -144,7 +143,7 @@ def dict(self): mapped on the first level of the dataverse up metadata structure. This should help to shorten code """ - for attr in DV_ATTR_FLAT: + for attr in self.__attr_flat: if self.__getattribute__(attr): data[attr] = self.__getattribute__(attr) else: @@ -228,8 +227,6 @@ class Dataset(object): 'notesText', 'productionDate', 'productionPlace', - 'grantNumber', - 'distributor', 'distributionDate', 'depositor', 'dateOfDeposit', @@ -241,8 +238,7 @@ class Dataset(object): 'originOfSources', 'characteristicOfSources', 'accessToSources', - 'kindOfData', - 'subject' + 'kindOfData' ] __attr_citation_arrays = { @@ -263,8 +259,7 @@ class Dataset(object): } __attr_geospatial_flat = [ - 'geographicUnit', - 'geographicBoundingBox' + 'geographicUnit' ] __attr_geospatial_arrays = { @@ -294,13 +289,7 @@ class Dataset(object): 'otherDataAppraisal', ] - __attr_socialscience_arrays = { - 'targetSampleSize': ['targetSampleActualSize', 'targetSampleSizeFormula'], - 'socialScienceNotes': ['socialScienceNotesType', 'socialScienceNotesSubject', 'socialScienceNotesText'] - } - __attr_journal_flat = [ - 'journalVolumeIssue', 'journalArticleType' ] @@ -368,6 +357,7 @@ def __init__(self): self.frequencyOfDataCollection = None self.samplingProcedure = None self.targetSampleSize = [] + self.socialScienceNotes = [] self.deviationsFromSampleDesign = None self.collectionMode = None self.researchInstrument = None @@ -380,9 +370,6 @@ def __init__(self): self.responseRate = None self.samplingErrorEstimates = None self.otherDataAppraisal = None - self.socialScienceNotesType = None - self.socialScienceNotesSubject = None - self.socialScienceNotesText = None """journal""" self.journal_displayName = None @@ -412,10 +399,12 @@ def is_valid(self): """ is_valid = True + # check if all required attributes are set for attr in self.__attr_required: if not self.__getattribute__(attr): is_valid = False print('attribute \'{0}\' missing.'.format(attr)) + return is_valid def import_metadata(self, filename, format): @@ -454,9 +443,9 @@ def import_metadata(self, filename, format): if field['typeName'] == 'series': if 'seriesName' in field['value']: - data['seriesName'] = field['value']['seriesName'] + data['seriesName'] = field['value']['seriesName']['value'] if 'seriesInformation' in field['value']: - data['seriesInformation'] = field['value']['seriesInformation'] + data['seriesInformation'] = field['value']['seriesInformation']['value'] else: # TODO: Exception print('citation not in json') @@ -489,10 +478,19 @@ def import_metadata(self, filename, format): if field['typeName'] in self.__attr_socialscience_flat: data[field['typeName']] = field['value'] - if field['typeName'] in self.__attr_socialscience_arrays: - data[field['typeName']] = self.__parse_dicts( - field['value'], - self.__attr_socialscience_arrays[field['typeName']]) + if field['typeName'] == 'targetSampleSize': + if 'targetSampleActualSize' in field['value']: + data['targetSampleActualSize'] = field['value']['targetSampleActualSize']['value'] + if 'targetSampleSizeFormula' in field['value']: + data['targetSampleSizeFormula'] = field['value']['targetSampleSizeFormula']['value'] + + if field['typeName'] == 'socialScienceNotes': + if 'socialScienceNotesType' in field['value']: + data['socialScienceNotesType'] = field['value']['socialScienceNotesType']['value'] + if 'socialScienceNotesSubject' in field['value']: + data['socialScienceNotesSubject'] = field['value']['socialScienceNotesSubject']['value'] + if 'socialScienceNotesText' in field['value']: + data['socialScienceNotesText'] = field['value']['socialScienceNotesText']['value'] else: # TODO: Exception print('socialscience not in json') @@ -558,164 +556,162 @@ def dict(self): citation = {} citation['fields'] = [] geospatial = {} + geospatial['fields'] = [] socialscience = {} + socialscience['fields'] = [] journal = {} + journal['fields'] = [] + tmp_list = [] - # TODO: prüfen, ob required attributes gesetzt sind. wenn nicht = Exception! + """dataset""" + for attr in self.__attr_flat: + data['datasetVersion'][attr] = self.__getattribute__(attr) - """Dataset""" + """citation""" + if self.citation_displayName: + citation['displayName'] = self.citation_displayName - if self.license: - data['datasetVersion']['license'] = self.license - if self.termsOfUse: - data['datasetVersion']['termsOfUse'] = self.termsOfUse - if self.termsOfAccess: - data['datasetVersion']['termsOfAccess'] = self.termsOfAccess + for attr in self.__attr_citation_flat: + citation['fields'].append({ + 'typeName': attr, + 'value': self.__getattribute__(attr) + }) + + for key, val in self.__attr_citation_arrays.items(): + # check if attribute exists + tmp_list = [] + if self.__getattribute__(key): + # loop over list of attribute dicts() + for d in self.__getattribute__(key): + tmp_dict = {} + # iterate over key-value pairs + for k, v in d.items(): + # check if key is in attribute list + if k in val: + tmp_dict[k] = {} + tmp_dict[k]['typeName'] = k + tmp_dict[k]['value'] = v + tmp_list.append(tmp_dict) + citation['fields'].append({ + 'typeName': key, + 'value': tmp_list + }) + + if self.__getattribute__('series'): + tmp_dict = {} + tmp_dict['value'] = {} + if 'seriesName' in self.__getattribute__('series'): + tmp_dict['value']['seriesName'] = {} + tmp_dict['value']['seriesName']['typeName'] = 'seriesName' + tmp_dict['value']['seriesName']['value'] = self.__getattribute__('seriesName') + if 'seriesInformation' in self.__getattribute__('series'): + tmp_dict['value']['seriesInformation'] = {} + tmp_dict['value']['seriesInformation']['typeName'] = 'seriesInformation' + tmp_dict['value']['seriesInformation']['value'] = self.__getattribute__('seriesInformation') + citation['fields'].append({ + 'typeName': 'series', + 'value': tmp_dict + }) - """Citation""" + """geospatial""" + for attr in self.__attr_geospatial_flat: + geospatial['fields'].append({ + 'typeName': attr, + 'value': self.__getattribute__(attr) + }) + + for key, val in self.__attr_geospatial_arrays.items(): + # check if attribute exists + tmp_list = [] + if self.__getattribute__(key): + # loop over list of attribute dicts() + for d in self.__getattribute__(key): + tmp_dict = {} + # iterate over key-value pairs + for k, v in d.items(): + # check if key is in attribute list + if k in val: + tmp_dict[k] = {} + tmp_dict[k]['typeName'] = k + tmp_dict[k]['value'] = v + tmp_list.append(tmp_dict) + geospatial['fields'].append({ + 'typeName': key, + 'value': tmp_list + }) - # Fields[] - if self.citation_displayName: - citation['displayName'] = self.citation_displayName - if self.title: - citation['fields'].append({'title': self.title}) - if self.subtitle: - citation['fields'].append({'subtitle': self.subtitle}) - if self.alternativeTitle: - citation['fields'].append({'alternativeTitle': self.alternativeTitle}) - if self.alternativeURL: - citation['fields'].append({'alternativeURL': self.alternativeURL}) - if self.otherId: - pass - if self.author: - pass - if self.datasetContact: - pass - if self.dsDescription: - pass - if self.subject: - citation['fields'].append({'subject': self.subject}) - if self.keyword: - pass - if self.topicClassification: - pass - if self.publication: - pass - if self.notesText: - citation['fields'].append({'notesText': self.notesText}) - if self.producer: - pass - if self.productionDate: - citation['fields'].append({'productionDate': self.productionDate}) - if self.productionPlace: - citation['fields'].append({'productionPlace': self.productionPlace}) - if self.contributor: - pass - if self.grantNumber: - pass - if self.distributor: - pass - if self.distributionDate: - citation['fields'].append({'distributionDate': self.distributionDate}) - if self.depositor: - citation['fields'].append({'depositor': self.depositor}) - if self.dateOfDeposit: - citation['fields'].append({'dateOfDeposit': self.dateOfDeposit}) - if self.timePeriodCovered: - pass - if self.dateOfCollection: - pass - if self.kindOfData: - citation['fields'].append({'kindOfData': self.kindOfData}) - if self.series: - pass - if self.software: - pass - if self.relatedMaterial: - citation['fields'].append({'relatedMaterial': self.relatedMaterial}) - if self.relatedDatasets: - citation['fields'].append( - {'relatedDatasets': self.relatedDatasets}) - if self.otherReferences: - citation['fields'].append({'otherReferences': self.otherReferences}) - if self.dataSources: - citation['fields'].append({'dataSources': self.dataSources}) - if self.originOfSources: - citation['fields'].append( - {'originOfSources': self.originOfSources}) - if self.characteristicOfSources: - citation['fields'].append( - {'characteristicOfSources': self.characteristicOfSources}) - if self.accessToSources: - citation['fields'].append({'accessToSources': self.accessToSources}) - - """Geospatial""" - if self.geospatial_displayName: - data['geospatial_displayName'] = self.geospatial_displayName - if self.geographicCoverage: - pass - if self.geographicUnit: - data['geographicUnit'] = self.geographicUnit - if self.geographicBoundingBox: - pass - - """SocialScience""" - if self.socialscience_displayName: - data['socialscience_displayName'] = self.socialscience_displayName - if self.unitOfAnalysis: - data['unitOfAnalysis'] = self.unitOfAnalysis - if self.universe: - data['universe'] = self.universe - if self.timeMethod: - data['timeMethod'] = self.timeMethod - if self.dataCollector: - data['dataCollector'] = self.dataCollector - if self.collectorTraining: - data['collectorTraining'] = self.collectorTraining - if self.frequencyOfDataCollection: - data['frequencyOfDataCollection'] = self.frequencyOfDataCollection - if self.samplingProcedure: - data['samplingProcedure'] = self.samplingProcedure - if self.targetSampleSize: - pass - if self.deviationsFromSampleDesign: - data['deviationsFromSampleDesign'] = self.deviationsFromSampleDesign - if self.collectionMode: - data['collectionMode'] = self.collectionMode - if self.researchInstrument: - data['researchInstrument'] = self.researchInstrument - if self.dataCollectionSituation: - data['dataCollectionSituation'] = self.dataCollectionSituation - if self.actionsToMinimizeLoss: - data['actionsToMinimizeLoss'] = self.actionsToMinimizeLoss - if self.controlOperations: - data['controlOperations'] = self.controlOperations - if self.weighting: - data['weighting'] = self.weighting - if self.cleaningOperations: - data['cleaningOperations'] = self.cleaningOperations - if self.datasetLevelErrorNotes: - data['datasetLevelErrorNotes'] = self.datasetLevelErrorNotes - if self.responseRate: - data['responseRate'] = self.responseRate - if self.samplingErrorEstimates: - data['samplingErrorEstimates'] = self.samplingErrorEstimates - if self.otherDataAppraisal: - data['otherDataAppraisal'] = self.otherDataAppraisal - if self.socialScienceNotesType: - data['socialScienceNotesType'] = self.socialScienceNotesType - if self.socialScienceNotesSubject: - data['socialScienceNotesSubject'] = self.socialScienceNotesSubject - if self.socialScienceNotesText: - data['socialScienceNotesText'] = self.socialScienceNotesText - - """Journal""" - if self.journal_displayName: - data['journal_displayName'] = self.journal_displayName - if self.journalVolumeIssue: - pass - if self.journalArticleType: - data['journalArticleType'] = self.journalArticleType + """socialscience""" + + for attr in self.__attr_socialscience_flat: + socialscience['fields'].append({ + 'typeName': attr, + 'value': self.__getattribute__(attr) + }) + + if self.__getattribute__('targetSampleSize'): + tmp_dict = {} + tmp_dict['value'] = {} + if 'targetSampleActualSize' in self.__getattribute__('targetSampleSize'): + tmp_dict['value']['targetSampleActualSize'] = {} + tmp_dict['value']['targetSampleActualSize']['typeName'] = 'targetSampleActualSize' + tmp_dict['value']['targetSampleActualSize']['value'] = self.__getattribute__('targetSampleActualSize') + if 'targetSampleSizeFormula' in self.__getattribute__('targetSampleSize'): + tmp_dict['value']['targetSampleSizeFormula'] = {} + tmp_dict['value']['targetSampleSizeFormula']['typeName'] = 'targetSampleSizeFormula' + tmp_dict['value']['targetSampleSizeFormula']['value'] = self.__getattribute__('targetSampleSizeFormula') + socialscience['fields'].append({ + 'typeName': 'series', + 'value': tmp_dict + }) + + if self.__getattribute__('socialScienceNotes'): + tmp_dict = {} + tmp_dict['value'] = {} + if 'socialScienceNotesType' in self.__getattribute__('socialScienceNotes'): + tmp_dict['value']['socialScienceNotesType'] = {} + tmp_dict['value']['socialScienceNotesType']['typeName'] = 'socialScienceNotesType' + tmp_dict['value']['socialScienceNotesType']['value'] = self.__getattribute__('socialScienceNotesType') + if 'socialScienceNotesSubject' in self.__getattribute__('socialScienceNotes'): + tmp_dict['value']['socialScienceNotesSubject'] = {} + tmp_dict['value']['socialScienceNotesSubject']['typeName'] = 'socialScienceNotesSubject' + tmp_dict['value']['socialScienceNotesSubject']['value'] = self.__getattribute__('socialScienceNotesSubject') + if 'socialScienceNotesText' in self.__getattribute__('socialScienceNotes'): + tmp_dict['value']['socialScienceNotesText'] = {} + tmp_dict['value']['socialScienceNotesText']['typeName'] = 'socialScienceNotesText' + tmp_dict['value']['socialScienceNotesText']['value'] = self.__getattribute__('socialScienceNotesText') + socialscience['fields'].append({ + 'typeName': 'series', + 'value': tmp_dict + }) + + """journal""" + for attr in self.__attr_journal_flat: + journal['fields'].append({ + 'typeName': attr, + 'value': self.__getattribute__(attr) + }) + + for key, val in self.__attr_journal_arrays.items(): + # check if attribute exists + tmp_list = [] + if self.__getattribute__(key): + # loop over list of attribute dicts() + for d in self.__getattribute__(key): + tmp_dict = {} + # iterate over key-value pairs + for k, v in d.items(): + # check if key is in attribute list + if k in val: + tmp_dict[k] = {} + tmp_dict[k]['typeName'] = k + tmp_dict[k]['value'] = v + tmp_list.append(tmp_dict) + journal['fields'].append({ + 'typeName': key, + 'value': tmp_list + }) + + # TODO: prüfen, ob required attributes gesetzt sind. wenn nicht = Exception! data['datasetVersion']['metadataBlocks']['citation'] = citation data['datasetVersion']['metadataBlocks']['socialscience'] = socialscience From 72b97f06d176fc9961cc53a3529187f71b6e5110 Mon Sep 17 00:00:00 2001 From: Stefan Kasberger Date: Wed, 5 Jun 2019 11:08:23 +0200 Subject: [PATCH 15/46] remove and add test data json files --- tests/data/dataset_full.json | 1139 +++++++++++++++++ ...eate_dataset.json => dataset_minimum.json} | 0 ...ataverse.json => dataverse_minimum_1.json} | 0 ...averse_2.json => dataverse_minimum_2.json} | 0 ...averse_3.json => dataverse_minimum_3.json} | 0 tests/data/{add-user.json => user.json} | 0 tests/test_api.py | 4 +- 7 files changed, 1141 insertions(+), 2 deletions(-) create mode 100644 tests/data/dataset_full.json rename tests/data/{create_dataset.json => dataset_minimum.json} (100%) rename tests/data/{create_dataverse.json => dataverse_minimum_1.json} (100%) rename tests/data/{create_dataverse_2.json => dataverse_minimum_2.json} (100%) rename tests/data/{create_dataverse_3.json => dataverse_minimum_3.json} (100%) rename tests/data/{add-user.json => user.json} (100%) diff --git a/tests/data/dataset_full.json b/tests/data/dataset_full.json new file mode 100644 index 0000000..1a55795 --- /dev/null +++ b/tests/data/dataset_full.json @@ -0,0 +1,1139 @@ +{ + "datasetVersion": { + "license": "CC0", + "termsOfUse": "CC0 Waiver", + "termsOfAccess": "Terms of Access", + "metadataBlocks": { + "citation": { + "displayName": "Citation Metadata", + "fields": [ + { + "typeName": "title", + "multiple": false, + "typeClass": "primitive", + "value": "Replication Data for: Title" + }, + { + "typeName": "subtitle", + "multiple": false, + "typeClass": "primitive", + "value": "Subtitle" + }, + { + "typeName": "alternativeTitle", + "multiple": false, + "typeClass": "primitive", + "value": "Alternative Title" + }, + { + "typeName": "alternativeURL", + "multiple": false, + "typeClass": "primitive", + "value": "http://AlternativeURL.org" + }, + { + "typeName": "otherId", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "otherIdAgency": { + "typeName": "otherIdAgency", + "multiple": false, + "typeClass": "primitive", + "value": "OtherIDAgency1" + }, + "otherIdValue": { + "typeName": "otherIdValue", + "multiple": false, + "typeClass": "primitive", + "value": "OtherIDIdentifier1" + } + } + ] + }, + { + "typeName": "author", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "authorName": { + "typeName": "authorName", + "multiple": false, + "typeClass": "primitive", + "value": "LastAuthor1, FirstAuthor1" + }, + "authorAffiliation": { + "typeName": "authorAffiliation", + "multiple": false, + "typeClass": "primitive", + "value": "AuthorAffiliation1" + }, + "authorIdentifierScheme": { + "typeName": "authorIdentifierScheme", + "multiple": false, + "typeClass": "controlledVocabulary", + "value": "ORCID" + }, + "authorIdentifier": { + "typeName": "authorIdentifier", + "multiple": false, + "typeClass": "primitive", + "value": "AuthorIdentifier1" + } + } + ] + }, + { + "typeName": "datasetContact", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "datasetContactName": { + "typeName": "datasetContactName", + "multiple": false, + "typeClass": "primitive", + "value": "LastContact1, FirstContact1" + }, + "datasetContactAffiliation": { + "typeName": "datasetContactAffiliation", + "multiple": false, + "typeClass": "primitive", + "value": "ContactAffiliation1" + }, + "datasetContactEmail": { + "typeName": "datasetContactEmail", + "multiple": false, + "typeClass": "primitive", + "value": "ContactEmail1@mailinator.com" + } + } + ] + }, + { + "typeName": "dsDescription", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "dsDescriptionValue": { + "typeName": "dsDescriptionValue", + "multiple": false, + "typeClass": "primitive", + "value": "DescriptionText2" + }, + "dsDescriptionDate": { + "typeName": "dsDescriptionDate", + "multiple": false, + "typeClass": "primitive", + "value": "1000-02-02" + } + } + ] + }, + { + "typeName": "subject", + "multiple": true, + "typeClass": "controlledVocabulary", + "value": [ + "Agricultural Sciences", + "Business and Management", + "Engineering", + "Law" + ] + }, + { + "typeName": "keyword", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "keywordValue": { + "typeName": "keywordValue", + "multiple": false, + "typeClass": "primitive", + "value": "KeywordTerm1" + }, + "keywordVocabulary": { + "typeName": "keywordVocabulary", + "multiple": false, + "typeClass": "primitive", + "value": "KeywordVocabulary1" + }, + "keywordVocabularyURI": { + "typeName": "keywordVocabularyURI", + "multiple": false, + "typeClass": "primitive", + "value": "http://KeywordVocabularyURL1.org" + } + } + ] + }, + { + "typeName": "topicClassification", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "topicClassValue": { + "typeName": "topicClassValue", + "multiple": false, + "typeClass": "primitive", + "value": "Topic Class Value1" + }, + "topicClassVocab": { + "typeName": "topicClassVocab", + "multiple": false, + "typeClass": "primitive", + "value": "Topic Classification Vocabulary" + } + } + ] + }, + { + "typeName": "publication", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "publicationCitation": { + "typeName": "publicationCitation", + "multiple": false, + "typeClass": "primitive", + "value": "RelatedPublicationCitation1" + }, + "publicationIDType": { + "typeName": "publicationIDType", + "multiple": false, + "typeClass": "controlledVocabulary", + "value": "ark" + }, + "publicationIDNumber": { + "typeName": "publicationIDNumber", + "multiple": false, + "typeClass": "primitive", + "value": "RelatedPublicationIDNumber1" + }, + "publicationURL": { + "typeName": "publicationURL", + "multiple": false, + "typeClass": "primitive", + "value": "http://RelatedPublicationURL1.org" + } + } + ] + }, + { + "typeName": "notesText", + "multiple": false, + "typeClass": "primitive", + "value": "Notes1" + }, + { + "typeName": "producer", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "producerName": { + "typeName": "producerName", + "multiple": false, + "typeClass": "primitive", + "value": "LastProducer1, FirstProducer1" + }, + "producerAffiliation": { + "typeName": "producerAffiliation", + "multiple": false, + "typeClass": "primitive", + "value": "ProducerAffiliation1" + }, + "producerAbbreviation": { + "typeName": "producerAbbreviation", + "multiple": false, + "typeClass": "primitive", + "value": "ProducerAbbreviation1" + }, + "producerURL": { + "typeName": "producerURL", + "multiple": false, + "typeClass": "primitive", + "value": "http://ProducerURL1.org" + }, + "producerLogoURL": { + "typeName": "producerLogoURL", + "multiple": false, + "typeClass": "primitive", + "value": "http://ProducerLogoURL1.org" + } + } + ] + }, + { + "typeName": "productionDate", + "multiple": false, + "typeClass": "primitive", + "value": "1003-01-01" + }, + { + "typeName": "productionPlace", + "multiple": false, + "typeClass": "primitive", + "value": "ProductionPlace" + }, + { + "typeName": "contributor", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "contributorType": { + "typeName": "contributorType", + "multiple": false, + "typeClass": "controlledVocabulary", + "value": "Data Collector" + }, + "contributorName": { + "typeName": "contributorName", + "multiple": false, + "typeClass": "primitive", + "value": "LastContributor1, FirstContributor1" + } + } + ] + }, + { + "typeName": "grantNumber", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "grantNumberAgency": { + "typeName": "grantNumberAgency", + "multiple": false, + "typeClass": "primitive", + "value": "GrantInformationGrantAgency1" + }, + "grantNumberValue": { + "typeName": "grantNumberValue", + "multiple": false, + "typeClass": "primitive", + "value": "GrantInformationGrantNumber1" + } + } + ] + }, + { + "typeName": "distributor", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "distributorName": { + "typeName": "distributorName", + "multiple": false, + "typeClass": "primitive", + "value": "LastDistributor1, FirstDistributor1" + }, + "distributorAffiliation": { + "typeName": "distributorAffiliation", + "multiple": false, + "typeClass": "primitive", + "value": "DistributorAffiliation1" + }, + "distributorAbbreviation": { + "typeName": "distributorAbbreviation", + "multiple": false, + "typeClass": "primitive", + "value": "DistributorAbbreviation1" + }, + "distributorURL": { + "typeName": "distributorURL", + "multiple": false, + "typeClass": "primitive", + "value": "http://DistributorURL1.org" + }, + "distributorLogoURL": { + "typeName": "distributorLogoURL", + "multiple": false, + "typeClass": "primitive", + "value": "http://DistributorLogoURL1.org" + } + } + ] + }, + { + "typeName": "distributionDate", + "multiple": false, + "typeClass": "primitive", + "value": "1004-01-01" + }, + { + "typeName": "depositor", + "multiple": false, + "typeClass": "primitive", + "value": "LastDepositor, FirstDepositor" + }, + { + "typeName": "dateOfDeposit", + "multiple": false, + "typeClass": "primitive", + "value": "1002-01-01" + }, + { + "typeName": "timePeriodCovered", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "timePeriodCoveredStart": { + "typeName": "timePeriodCoveredStart", + "multiple": false, + "typeClass": "primitive", + "value": "1005-01-01" + }, + "timePeriodCoveredEnd": { + "typeName": "timePeriodCoveredEnd", + "multiple": false, + "typeClass": "primitive", + "value": "1005-01-02" + } + } + ] + }, + { + "typeName": "dateOfCollection", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "dateOfCollectionStart": { + "typeName": "dateOfCollectionStart", + "multiple": false, + "typeClass": "primitive", + "value": "1006-01-01" + }, + "dateOfCollectionEnd": { + "typeName": "dateOfCollectionEnd", + "multiple": false, + "typeClass": "primitive", + "value": "1006-01-01" + } + } + ] + }, + { + "typeName": "kindOfData", + "multiple": true, + "typeClass": "primitive", + "value": [ + "KindOfData1", + "KindOfData2" + ] + }, + { + "typeName": "series", + "multiple": false, + "typeClass": "compound", + "value": { + "seriesName": { + "typeName": "seriesName", + "multiple": false, + "typeClass": "primitive", + "value": "SeriesName" + }, + "seriesInformation": { + "typeName": "seriesInformation", + "multiple": false, + "typeClass": "primitive", + "value": "SeriesInformation" + } + } + }, + { + "typeName": "software", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "softwareName": { + "typeName": "softwareName", + "multiple": false, + "typeClass": "primitive", + "value": "SoftwareName1" + }, + "softwareVersion": { + "typeName": "softwareVersion", + "multiple": false, + "typeClass": "primitive", + "value": "SoftwareVersion1" + } + } + ] + }, + { + "typeName": "relatedMaterial", + "multiple": true, + "typeClass": "primitive", + "value": [ + "RelatedMaterial1", + "RelatedMaterial2" + ] + }, + { + "typeName": "relatedDatasets", + "multiple": true, + "typeClass": "primitive", + "value": [ + "RelatedDatasets1", + "RelatedDatasets2" + ] + }, + { + "typeName": "otherReferences", + "multiple": true, + "typeClass": "primitive", + "value": [ + "OtherReferences1", + "OtherReferences2" + ] + }, + { + "typeName": "dataSources", + "multiple": true, + "typeClass": "primitive", + "value": [ + "DataSources1", + "DataSources2" + ] + }, + { + "typeName": "originOfSources", + "multiple": false, + "typeClass": "primitive", + "value": "OriginOfSources" + }, + { + "typeName": "characteristicOfSources", + "multiple": false, + "typeClass": "primitive", + "value": "CharacteristicOfSourcesNoted" + }, + { + "typeName": "accessToSources", + "multiple": false, + "typeClass": "primitive", + "value": "DocumentationAndAccessToSources" + } + ] + }, + "geospatial": { + "displayName": "Geospatial Metadata", + "fields": [ + { + "typeName": "geographicCoverage", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "country": { + "typeName": "country", + "multiple": false, + "typeClass": "controlledVocabulary", + "value": "Afghanistan" + }, + "state": { + "typeName": "state", + "multiple": false, + "typeClass": "primitive", + "value": "GeographicCoverageStateProvince1" + }, + "city": { + "typeName": "city", + "multiple": false, + "typeClass": "primitive", + "value": "GeographicCoverageCity1" + }, + "otherGeographicCoverage": { + "typeName": "otherGeographicCoverage", + "multiple": false, + "typeClass": "primitive", + "value": "GeographicCoverageOther1" + } + } + ] + }, + { + "typeName": "geographicUnit", + "multiple": true, + "typeClass": "primitive", + "value": [ + "GeographicUnit1", + "GeographicUnit2" + ] + }, + { + "typeName": "geographicBoundingBox", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "westLongitude": { + "typeName": "westLongitude", + "multiple": false, + "typeClass": "primitive", + "value": "10" + }, + "eastLongitude": { + "typeName": "eastLongitude", + "multiple": false, + "typeClass": "primitive", + "value": "20" + }, + "northLongitude": { + "typeName": "northLongitude", + "multiple": false, + "typeClass": "primitive", + "value": "30" + }, + "southLongitude": { + "typeName": "southLongitude", + "multiple": false, + "typeClass": "primitive", + "value": "40" + } + } + ] + } + ] + }, + "socialscience": { + "displayName": "Social Science and Humanities Metadata", + "fields": [ + { + "typeName": "unitOfAnalysis", + "multiple": true, + "typeClass": "primitive", + "value": [ + "UnitOfAnalysis1", + "UnitOfAnalysis2" + ] + }, + { + "typeName": "universe", + "multiple": true, + "typeClass": "primitive", + "value": [ + "Universe1", + "Universe2" + ] + }, + { + "typeName": "timeMethod", + "multiple": false, + "typeClass": "primitive", + "value": "TimeMethod" + }, + { + "typeName": "dataCollector", + "multiple": false, + "typeClass": "primitive", + "value": "LastDataCollector1, FirstDataCollector1" + }, + { + "typeName": "collectorTraining", + "multiple": false, + "typeClass": "primitive", + "value": "CollectorTraining" + }, + { + "typeName": "frequencyOfDataCollection", + "multiple": false, + "typeClass": "primitive", + "value": "Frequency" + }, + { + "typeName": "samplingProcedure", + "multiple": false, + "typeClass": "primitive", + "value": "SamplingProcedure" + }, + { + "typeName": "targetSampleSize", + "multiple": false, + "typeClass": "compound", + "value": { + "targetSampleActualSize": { + "typeName": "targetSampleActualSize", + "multiple": false, + "typeClass": "primitive", + "value": "100" + }, + "targetSampleSizeFormula": { + "typeName": "targetSampleSizeFormula", + "multiple": false, + "typeClass": "primitive", + "value": "TargetSampleSizeFormula" + } + } + }, + { + "typeName": "deviationsFromSampleDesign", + "multiple": false, + "typeClass": "primitive", + "value": "MajorDeviationsForSampleDesign" + }, + { + "typeName": "collectionMode", + "multiple": false, + "typeClass": "primitive", + "value": "CollectionMode" + }, + { + "typeName": "researchInstrument", + "multiple": false, + "typeClass": "primitive", + "value": "TypeOfResearchInstrument" + }, + { + "typeName": "dataCollectionSituation", + "multiple": false, + "typeClass": "primitive", + "value": "CharacteristicsOfDataCollectionSituation" + }, + { + "typeName": "actionsToMinimizeLoss", + "multiple": false, + "typeClass": "primitive", + "value": "ActionsToMinimizeLosses" + }, + { + "typeName": "controlOperations", + "multiple": false, + "typeClass": "primitive", + "value": "ControlOperations" + }, + { + "typeName": "weighting", + "multiple": false, + "typeClass": "primitive", + "value": "Weighting" + }, + { + "typeName": "cleaningOperations", + "multiple": false, + "typeClass": "primitive", + "value": "CleaningOperations" + }, + { + "typeName": "datasetLevelErrorNotes", + "multiple": false, + "typeClass": "primitive", + "value": "StudyLevelErrorNotes" + }, + { + "typeName": "responseRate", + "multiple": false, + "typeClass": "primitive", + "value": "ResponseRate" + }, + { + "typeName": "samplingErrorEstimates", + "multiple": false, + "typeClass": "primitive", + "value": "EstimatesOfSamplingError" + }, + { + "typeName": "otherDataAppraisal", + "multiple": false, + "typeClass": "primitive", + "value": "OtherFormsOfDataAppraisal" + }, + { + "typeName": "socialScienceNotes", + "multiple": false, + "typeClass": "compound", + "value": { + "socialScienceNotesType": { + "typeName": "socialScienceNotesType", + "multiple": false, + "typeClass": "primitive", + "value": "NotesType" + }, + "socialScienceNotesSubject": { + "typeName": "socialScienceNotesSubject", + "multiple": false, + "typeClass": "primitive", + "value": "NotesSubject" + }, + "socialScienceNotesText": { + "typeName": "socialScienceNotesText", + "multiple": false, + "typeClass": "primitive", + "value": "NotesText" + } + } + } + ] + }, + "astrophysics": { + "displayName": "Astronomy and Astrophysics Metadata", + "fields": [ + { + "typeName": "astroType", + "multiple": true, + "typeClass": "controlledVocabulary", + "value": [ + "Image", + "Mosaic", + "EventList", + "Cube" + ] + }, + { + "typeName": "astroFacility", + "multiple": true, + "typeClass": "primitive", + "value": [ + "Facility1", + "Facility2" + ] + }, + { + "typeName": "astroInstrument", + "multiple": true, + "typeClass": "primitive", + "value": [ + "Instrument1", + "Instrument2" + ] + }, + { + "typeName": "astroObject", + "multiple": true, + "typeClass": "primitive", + "value": [ + "Object1", + "Object2" + ] + }, + { + "typeName": "resolution.Spatial", + "multiple": false, + "typeClass": "primitive", + "value": "SpatialResolution" + }, + { + "typeName": "resolution.Spectral", + "multiple": false, + "typeClass": "primitive", + "value": "SpectralResolution" + }, + { + "typeName": "resolution.Temporal", + "multiple": false, + "typeClass": "primitive", + "value": "TimeResolution" + }, + { + "typeName": "coverage.Spectral.Bandpass", + "multiple": true, + "typeClass": "primitive", + "value": [ + "Bandpass1", + "Bandpass2" + ] + }, + { + "typeName": "coverage.Spectral.CentralWavelength", + "multiple": true, + "typeClass": "primitive", + "value": [ + "3001", + "3002" + ] + }, + { + "typeName": "coverage.Spectral.Wavelength", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "coverage.Spectral.MinimumWavelength": { + "typeName": "coverage.Spectral.MinimumWavelength", + "multiple": false, + "typeClass": "primitive", + "value": "4001" + }, + "coverage.Spectral.MaximumWavelength": { + "typeName": "coverage.Spectral.MaximumWavelength", + "multiple": false, + "typeClass": "primitive", + "value": "4002" + } + }, + { + "coverage.Spectral.MinimumWavelength": { + "typeName": "coverage.Spectral.MinimumWavelength", + "multiple": false, + "typeClass": "primitive", + "value": "4003" + }, + "coverage.Spectral.MaximumWavelength": { + "typeName": "coverage.Spectral.MaximumWavelength", + "multiple": false, + "typeClass": "primitive", + "value": "4004" + } + } + ] + }, + { + "typeName": "coverage.Temporal", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "coverage.Temporal.StartTime": { + "typeName": "coverage.Temporal.StartTime", + "multiple": false, + "typeClass": "primitive", + "value": "1007-01-01" + }, + "coverage.Temporal.StopTime": { + "typeName": "coverage.Temporal.StopTime", + "multiple": false, + "typeClass": "primitive", + "value": "1007-01-02" + } + }, + { + "coverage.Temporal.StartTime": { + "typeName": "coverage.Temporal.StartTime", + "multiple": false, + "typeClass": "primitive", + "value": "1007-02-01" + }, + "coverage.Temporal.StopTime": { + "typeName": "coverage.Temporal.StopTime", + "multiple": false, + "typeClass": "primitive", + "value": "1007-02-02" + } + } + ] + }, + { + "typeName": "coverage.Spatial", + "multiple": true, + "typeClass": "primitive", + "value": [ + "SkyCoverage1", + "SkyCoverage2" + ] + }, + { + "typeName": "coverage.Depth", + "multiple": false, + "typeClass": "primitive", + "value": "200" + }, + { + "typeName": "coverage.ObjectDensity", + "multiple": false, + "typeClass": "primitive", + "value": "300" + }, + { + "typeName": "coverage.ObjectCount", + "multiple": false, + "typeClass": "primitive", + "value": "400" + }, + { + "typeName": "coverage.SkyFraction", + "multiple": false, + "typeClass": "primitive", + "value": "500" + }, + { + "typeName": "coverage.Polarization", + "multiple": false, + "typeClass": "primitive", + "value": "Polarization" + }, + { + "typeName": "redshiftType", + "multiple": false, + "typeClass": "primitive", + "value": "RedshiftType" + }, + { + "typeName": "resolution.Redshift", + "multiple": false, + "typeClass": "primitive", + "value": "600" + }, + { + "typeName": "coverage.RedshiftValue", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "coverage.Redshift.MinimumValue": { + "typeName": "coverage.Redshift.MinimumValue", + "multiple": false, + "typeClass": "primitive", + "value": "701" + }, + "coverage.Redshift.MaximumValue": { + "typeName": "coverage.Redshift.MaximumValue", + "multiple": false, + "typeClass": "primitive", + "value": "702" + } + } + ] + } + ] + }, + "biomedical": { + "displayName": "Life Sciences Metadata", + "fields": [ + { + "typeName": "studyDesignType", + "multiple": true, + "typeClass": "controlledVocabulary", + "value": [ + "Case Control", + "Cross Sectional", + "Cohort Study", + "Not Specified" + ] + }, + { + "typeName": "studyFactorType", + "multiple": true, + "typeClass": "controlledVocabulary", + "value": [ + "Age", + "Biomarkers", + "Cell Surface Markers", + "Developmental Stage" + ] + }, + { + "typeName": "studyAssayOrganism", + "multiple": true, + "typeClass": "controlledVocabulary", + "value": [ + "Arabidopsis thaliana", + "Bos taurus", + "Caenorhabditis elegans", + "Danio rerio (zebrafish)" + ] + }, + { + "typeName": "studyAssayOtherOrganism", + "multiple": true, + "typeClass": "primitive", + "value": [ + "OtherOrganism1", + "OtherOrganism2" + ] + }, + { + "typeName": "studyAssayMeasurementType", + "multiple": true, + "typeClass": "controlledVocabulary", + "value": [ + "cell counting", + "cell sorting", + "clinical chemistry analysis", + "DNA methylation profiling" + ] + }, + { + "typeName": "studyAssayOtherMeasurmentType", + "multiple": true, + "typeClass": "primitive", + "value": [ + "OtherMeasurementType1", + "OtherMeasurementType2" + ] + }, + { + "typeName": "studyAssayTechnologyType", + "multiple": true, + "typeClass": "controlledVocabulary", + "value": [ + "culture based drug susceptibility testing, single concentration", + "culture based drug susceptibility testing, two concentrations", + "culture based drug susceptibility testing, three or more concentrations (minimium inhibitory concentration measurement)", + "flow cytometry" + ] + }, + { + "typeName": "studyAssayPlatform", + "multiple": true, + "typeClass": "controlledVocabulary", + "value": [ + "210-MS GC Ion Trap (Varian)", + "220-MS GC Ion Trap (Varian)", + "225-MS GC Ion Trap (Varian)", + "300-MS quadrupole GC/MS (Varian)" + ] + }, + { + "typeName": "studyAssayCellType", + "multiple": true, + "typeClass": "primitive", + "value": [ + "CellType1", + "CellType2" + ] + } + ] + }, + "journal": { + "displayName": "Journal Metadata", + "fields": [ + { + "typeName": "journalVolumeIssue", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "journalVolume": { + "typeName": "journalVolume", + "multiple": false, + "typeClass": "primitive", + "value": "JournalVolume1" + }, + "journalIssue": { + "typeName": "journalIssue", + "multiple": false, + "typeClass": "primitive", + "value": "JournalIssue1" + }, + "journalPubDate": { + "typeName": "journalPubDate", + "multiple": false, + "typeClass": "primitive", + "value": "1008-01-01" + } + } + ] + }, + { + "typeName": "journalArticleType", + "multiple": false, + "typeClass": "controlledVocabulary", + "value": "abstract" + } + ] + } + } + } +} diff --git a/tests/data/create_dataset.json b/tests/data/dataset_minimum.json similarity index 100% rename from tests/data/create_dataset.json rename to tests/data/dataset_minimum.json diff --git a/tests/data/create_dataverse.json b/tests/data/dataverse_minimum_1.json similarity index 100% rename from tests/data/create_dataverse.json rename to tests/data/dataverse_minimum_1.json diff --git a/tests/data/create_dataverse_2.json b/tests/data/dataverse_minimum_2.json similarity index 100% rename from tests/data/create_dataverse_2.json rename to tests/data/dataverse_minimum_2.json diff --git a/tests/data/create_dataverse_3.json b/tests/data/dataverse_minimum_3.json similarity index 100% rename from tests/data/create_dataverse_3.json rename to tests/data/dataverse_minimum_3.json diff --git a/tests/data/add-user.json b/tests/data/user.json similarity index 100% rename from tests/data/add-user.json rename to tests/data/user.json diff --git a/tests/test_api.py b/tests/test_api.py index 85da49c..8cfb88c 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -79,8 +79,8 @@ class TestApiRequests(object): def setup_class(cls): """Create the api connection for later use.""" cls.dataverse_id = 'test-pyDataverse' - cls.filename_dataverse = TEST_DIR+'/data/create_dataverse.json' - cls.filename_dataset = TEST_DIR+'/data/create_dataset.json' + cls.filename_dataverse = TEST_DIR+'/data/dataverse_minimum_1.json.json' + cls.filename_dataset = TEST_DIR+'/data/dataset_full.json.json' cls.api = Api(BASE_URL, api_token=API_TOKEN) sleep(SLEEP_TIME) assert cls.api From 7a8c75979bc289a1bbd4f11f53bc7b82a1586386 Mon Sep 17 00:00:00 2001 From: Stefan Kasberger Date: Wed, 5 Jun 2019 11:53:45 +0200 Subject: [PATCH 16/46] add checks to is_valid in Dataset() --- src/pyDataverse/models.py | 373 +++++++++++++++++++++----------------- 1 file changed, 210 insertions(+), 163 deletions(-) diff --git a/src/pyDataverse/models.py b/src/pyDataverse/models.py index 5fa5737..87d63a7 100644 --- a/src/pyDataverse/models.py +++ b/src/pyDataverse/models.py @@ -205,7 +205,6 @@ class Dataset(object): """Base class for the Dataset model.""" __attr_required = [ - 'displayName', 'title', 'author', 'datasetContact', @@ -399,12 +398,70 @@ def is_valid(self): """ is_valid = True + # check if all required attributes are set for attr in self.__attr_required: if not self.__getattribute__(attr): is_valid = False print('attribute \'{0}\' missing.'.format(attr)) + # check if attribute sets are complete where necessary + tp_cov = self.__getattribute__('timePeriodCovered') + if tp_cov: + for tp in tp_cov: + if tp['timePeriodCoveredStart'] or tp['timePeriodCoveredEnd']: + if not (tp['timePeriodCoveredStart'] and tp['timePeriodCoveredEnd']): + is_valid = False + + d_coll = self.__getattribute__('dateOfCollection') + if d_coll: + for d in d_coll: + if d['dateOfCollectionStart'] or d['dateOfCollectionEnd']: + if not (d['dateOfCollectionStart'] and d['dateOfCollectionEnd']): + is_valid = False + + authors = self.__getattribute__('author') + if authors: + for a in authors: + if a['authorAffiliation'] or a['authorIdentifierScheme'] or a['authorIdentifier']: + if not a['authorName']: + is_valid = False + + ds_contac = self.__getattribute__('datasetContact') + if ds_contac: + for c in ds_contac: + if c['datasetContactAffiliation'] or c['datasetContactEmail']: + if not c['datasetContactName']: + is_valid = False + + producer = self.__getattribute__('producer') + if producer: + for p in producer: + if p['producerAffiliation'] or p['producerAbbreviation'] or p['producerURL'] or p['producerLogoURL']: + if not p['producerName']: + is_valid = False + + contributor = self.__getattribute__('contributor') + if contributor: + for c in contributor: + if c['contributorType']: + if not c['contributorName']: + is_valid = False + + distributor = self.__getattribute__('distributor') + if distributor: + for d in distributor: + if d['distributorAffiliation'] or d['distributorAbbreviation'] or d['distributorURL'] or d['distributorLogoURL']: + if not d['distributorName']: + is_valid = False + + bbox = self.__getattribute__('geographicBoundingBox') + if bbox: + for b in bbox: + if b: + if not (b['westLongitude'] and b['eastLongitude'] and b['northLongitude'] and b['southLongitude']): + is_valid = False + return is_valid def import_metadata(self, filename, format): @@ -550,175 +607,165 @@ def dict(self): TODO: Validate standard """ - data = {} - data['datasetVersion'] = {} - data['datasetVersion']['metadataBlocks'] = {} - citation = {} - citation['fields'] = [] - geospatial = {} - geospatial['fields'] = [] - socialscience = {} - socialscience['fields'] = [] - journal = {} - journal['fields'] = [] - tmp_list = [] + if self.is_valid(): + data = {} + data['datasetVersion'] = {} + data['datasetVersion']['metadataBlocks'] = {} + citation = {} + citation['fields'] = [] + geospatial = {} + geospatial['fields'] = [] + socialscience = {} + socialscience['fields'] = [] + journal = {} + journal['fields'] = [] - """dataset""" - for attr in self.__attr_flat: - data['datasetVersion'][attr] = self.__getattribute__(attr) + """dataset""" + # Generate first level attributes + for attr in self.__attr_flat: + data['datasetVersion'][attr] = self.__getattribute__(attr) - """citation""" - if self.citation_displayName: - citation['displayName'] = self.citation_displayName - - for attr in self.__attr_citation_flat: - citation['fields'].append({ - 'typeName': attr, - 'value': self.__getattribute__(attr) - }) - - for key, val in self.__attr_citation_arrays.items(): - # check if attribute exists - tmp_list = [] - if self.__getattribute__(key): - # loop over list of attribute dicts() - for d in self.__getattribute__(key): - tmp_dict = {} - # iterate over key-value pairs - for k, v in d.items(): - # check if key is in attribute list - if k in val: - tmp_dict[k] = {} - tmp_dict[k]['typeName'] = k - tmp_dict[k]['value'] = v - tmp_list.append(tmp_dict) - citation['fields'].append({ - 'typeName': key, - 'value': tmp_list - }) - - if self.__getattribute__('series'): - tmp_dict = {} - tmp_dict['value'] = {} - if 'seriesName' in self.__getattribute__('series'): - tmp_dict['value']['seriesName'] = {} - tmp_dict['value']['seriesName']['typeName'] = 'seriesName' - tmp_dict['value']['seriesName']['value'] = self.__getattribute__('seriesName') - if 'seriesInformation' in self.__getattribute__('series'): - tmp_dict['value']['seriesInformation'] = {} - tmp_dict['value']['seriesInformation']['typeName'] = 'seriesInformation' - tmp_dict['value']['seriesInformation']['value'] = self.__getattribute__('seriesInformation') - citation['fields'].append({ - 'typeName': 'series', - 'value': tmp_dict - }) + """citation""" + if self.citation_displayName: + citation['displayName'] = self.citation_displayName + + # Generate first level attributes + for attr in self.__attr_citation_flat: + citation['fields'].append({ + 'typeName': attr, + 'value': self.__getattribute__(attr) + }) + + # Generate fields attributes + for key, val in self.__attr_citation_arrays.items(): + citation['fields'].append({ + 'typeName': key, + 'value': self.__generate_dicts(key, val) + }) + + # Generate series attributes + if self.__getattribute__('series'): + tmp_dict = {} + tmp_dict['value'] = {} + if 'seriesName' in self.__getattribute__('series'): + tmp_dict['value']['seriesName'] = {} + tmp_dict['value']['seriesName']['typeName'] = 'seriesName' + tmp_dict['value']['seriesName']['value'] = self.__getattribute__('seriesName') + if 'seriesInformation' in self.__getattribute__('series'): + tmp_dict['value']['seriesInformation'] = {} + tmp_dict['value']['seriesInformation']['typeName'] = 'seriesInformation' + tmp_dict['value']['seriesInformation']['value'] = self.__getattribute__('seriesInformation') + citation['fields'].append({ + 'typeName': 'series', + 'value': tmp_dict + }) - """geospatial""" - for attr in self.__attr_geospatial_flat: - geospatial['fields'].append({ - 'typeName': attr, - 'value': self.__getattribute__(attr) - }) - - for key, val in self.__attr_geospatial_arrays.items(): - # check if attribute exists - tmp_list = [] - if self.__getattribute__(key): - # loop over list of attribute dicts() - for d in self.__getattribute__(key): - tmp_dict = {} - # iterate over key-value pairs - for k, v in d.items(): - # check if key is in attribute list - if k in val: - tmp_dict[k] = {} - tmp_dict[k]['typeName'] = k - tmp_dict[k]['value'] = v - tmp_list.append(tmp_dict) - geospatial['fields'].append({ - 'typeName': key, - 'value': tmp_list - }) + """geospatial""" + # Generate first level attributes + for attr in self.__attr_geospatial_flat: + geospatial['fields'].append({ + 'typeName': attr, + 'value': self.__getattribute__(attr) + }) + + # Generate fields attributes + for key, val in self.__attr_geospatial_arrays.items(): + # check if attribute exists + geospatial['fields'].append({ + 'typeName': key, + 'value': self.__generate_dicts(key, val) + }) - """socialscience""" + """socialscience""" + # Generate first level attributes + for attr in self.__attr_socialscience_flat: + socialscience['fields'].append({ + 'typeName': attr, + 'value': self.__getattribute__(attr) + }) + + # Generate targetSampleSize attributes + if self.__getattribute__('targetSampleSize'): + tmp_dict = {} + tmp_dict['value'] = {} + if 'targetSampleActualSize' in self.__getattribute__('targetSampleSize'): + tmp_dict['value']['targetSampleActualSize'] = {} + tmp_dict['value']['targetSampleActualSize']['typeName'] = 'targetSampleActualSize' + tmp_dict['value']['targetSampleActualSize']['value'] = self.__getattribute__('targetSampleActualSize') + if 'targetSampleSizeFormula' in self.__getattribute__('targetSampleSize'): + tmp_dict['value']['targetSampleSizeFormula'] = {} + tmp_dict['value']['targetSampleSizeFormula']['typeName'] = 'targetSampleSizeFormula' + tmp_dict['value']['targetSampleSizeFormula']['value'] = self.__getattribute__('targetSampleSizeFormula') + socialscience['fields'].append({ + 'typeName': 'series', + 'value': tmp_dict + }) + + # Generate socialScienceNotes attributes + if self.__getattribute__('socialScienceNotes'): + tmp_dict = {} + tmp_dict['value'] = {} + if 'socialScienceNotesType' in self.__getattribute__('socialScienceNotes'): + tmp_dict['value']['socialScienceNotesType'] = {} + tmp_dict['value']['socialScienceNotesType']['typeName'] = 'socialScienceNotesType' + tmp_dict['value']['socialScienceNotesType']['value'] = self.__getattribute__('socialScienceNotesType') + if 'socialScienceNotesSubject' in self.__getattribute__('socialScienceNotes'): + tmp_dict['value']['socialScienceNotesSubject'] = {} + tmp_dict['value']['socialScienceNotesSubject']['typeName'] = 'socialScienceNotesSubject' + tmp_dict['value']['socialScienceNotesSubject']['value'] = self.__getattribute__('socialScienceNotesSubject') + if 'socialScienceNotesText' in self.__getattribute__('socialScienceNotes'): + tmp_dict['value']['socialScienceNotesText'] = {} + tmp_dict['value']['socialScienceNotesText']['typeName'] = 'socialScienceNotesText' + tmp_dict['value']['socialScienceNotesText']['value'] = self.__getattribute__('socialScienceNotesText') + socialscience['fields'].append({ + 'typeName': 'series', + 'value': tmp_dict + }) - for attr in self.__attr_socialscience_flat: - socialscience['fields'].append({ - 'typeName': attr, - 'value': self.__getattribute__(attr) - }) + """journal""" + # Generate first level attributes + for attr in self.__attr_journal_flat: + journal['fields'].append({ + 'typeName': attr, + 'value': self.__getattribute__(attr) + }) + + # Generate fields attributes + for key, val in self.__attr_journal_arrays.items(): + journal['fields'].append({ + 'typeName': key, + 'value': self.__generate_dicts(key, val) + }) + + # TODO: prüfen, ob required attributes gesetzt sind. wenn nicht = Exception! + data['datasetVersion']['metadataBlocks']['citation'] = citation + data['datasetVersion']['metadataBlocks']['socialscience'] = socialscience + data['datasetVersion']['metadataBlocks']['geospatial'] = geospatial + data['datasetVersion']['metadataBlocks']['journal'] = journal - if self.__getattribute__('targetSampleSize'): - tmp_dict = {} - tmp_dict['value'] = {} - if 'targetSampleActualSize' in self.__getattribute__('targetSampleSize'): - tmp_dict['value']['targetSampleActualSize'] = {} - tmp_dict['value']['targetSampleActualSize']['typeName'] = 'targetSampleActualSize' - tmp_dict['value']['targetSampleActualSize']['value'] = self.__getattribute__('targetSampleActualSize') - if 'targetSampleSizeFormula' in self.__getattribute__('targetSampleSize'): - tmp_dict['value']['targetSampleSizeFormula'] = {} - tmp_dict['value']['targetSampleSizeFormula']['typeName'] = 'targetSampleSizeFormula' - tmp_dict['value']['targetSampleSizeFormula']['value'] = self.__getattribute__('targetSampleSizeFormula') - socialscience['fields'].append({ - 'typeName': 'series', - 'value': tmp_dict - }) - - if self.__getattribute__('socialScienceNotes'): - tmp_dict = {} - tmp_dict['value'] = {} - if 'socialScienceNotesType' in self.__getattribute__('socialScienceNotes'): - tmp_dict['value']['socialScienceNotesType'] = {} - tmp_dict['value']['socialScienceNotesType']['typeName'] = 'socialScienceNotesType' - tmp_dict['value']['socialScienceNotesType']['value'] = self.__getattribute__('socialScienceNotesType') - if 'socialScienceNotesSubject' in self.__getattribute__('socialScienceNotes'): - tmp_dict['value']['socialScienceNotesSubject'] = {} - tmp_dict['value']['socialScienceNotesSubject']['typeName'] = 'socialScienceNotesSubject' - tmp_dict['value']['socialScienceNotesSubject']['value'] = self.__getattribute__('socialScienceNotesSubject') - if 'socialScienceNotesText' in self.__getattribute__('socialScienceNotes'): - tmp_dict['value']['socialScienceNotesText'] = {} - tmp_dict['value']['socialScienceNotesText']['typeName'] = 'socialScienceNotesText' - tmp_dict['value']['socialScienceNotesText']['value'] = self.__getattribute__('socialScienceNotesText') - socialscience['fields'].append({ - 'typeName': 'series', - 'value': tmp_dict - }) + return data + else: + print('dict can not be created. Data is not valid') + return None - """journal""" - for attr in self.__attr_journal_flat: - journal['fields'].append({ - 'typeName': attr, - 'value': self.__getattribute__(attr) - }) - - for key, val in self.__attr_journal_arrays.items(): - # check if attribute exists - tmp_list = [] - if self.__getattribute__(key): - # loop over list of attribute dicts() - for d in self.__getattribute__(key): - tmp_dict = {} - # iterate over key-value pairs - for k, v in d.items(): - # check if key is in attribute list - if k in val: - tmp_dict[k] = {} - tmp_dict[k]['typeName'] = k - tmp_dict[k]['value'] = v - tmp_list.append(tmp_dict) - journal['fields'].append({ - 'typeName': key, - 'value': tmp_list - }) - - # TODO: prüfen, ob required attributes gesetzt sind. wenn nicht = Exception! - - data['datasetVersion']['metadataBlocks']['citation'] = citation - data['datasetVersion']['metadataBlocks']['socialscience'] = socialscience - data['datasetVersion']['metadataBlocks']['geospatial'] = geospatial - data['datasetVersion']['metadataBlocks']['journal'] = journal - - return data + def __generate_dicts(self, key, val): + """Parse out list of dicts of metadata attributes for dict export.""" + # check if attribute exists + tmp_list = [] + if self.__getattribute__(key): + # loop over list of attribute dicts() + for d in self.__getattribute__(key): + tmp_dict = {} + # iterate over key-value pairs + for k, v in d.items(): + # check if key is in attribute list + if k in val: + tmp_dict[k] = {} + tmp_dict[k]['typeName'] = k + tmp_dict[k]['value'] = v + tmp_list.append(tmp_dict) + + return tmp_list @property def json(self): From 542d1321315f69e4dccf8a6274ac7f54c4c67eff Mon Sep 17 00:00:00 2001 From: Stefan Kasberger Date: Wed, 5 Jun 2019 15:42:54 +0200 Subject: [PATCH 17/46] add Datafiles() class with basic functionality --- src/pyDataverse/models.py | 110 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) diff --git a/src/pyDataverse/models.py b/src/pyDataverse/models.py index 87d63a7..553472c 100644 --- a/src/pyDataverse/models.py +++ b/src/pyDataverse/models.py @@ -298,6 +298,8 @@ class Dataset(object): def __init__(self): """Init Dataset() class.""" + self.datafiles = [] + """dataset""" self.license = None self.termsOfUse = None @@ -788,3 +790,111 @@ def export_metadata(self, filename, format): else: # TODO: Exception print('Data-format not right.') + + +class Datafile(object): + """Base class for the Datafile model.""" + + __attr_required = [ + 'filename', + 'pid' + ] + + __attr_flat = [ + 'description', + 'categories' + ] + + def __init__(self, filename=None, pid=None): + """Init `Datafile()` class.""" + self.filename = filename + self.pid = pid + self.description = None + self.categories = [] + + def __str__(self): + """Return name of Datafile() class for users.""" + return 'pyDataverse Datafile() model class.' + + def set(self, data): + """Set attributes. + + Takes a dict with Key-Value pairs containing Datafile metadata. + Keys: attribute name. named after dataverse up standard. + Value: attribute value. types must be compatible for dataverse up. + + """ + for key, val in data.items(): + self.__setattr__(key, val) + + def is_valid(self): + """Check if metadata stored in attributes is valid for dataverse api upload. + + more + + """ + is_valid = True + + for attr in self.__attr_required: + if not self.__getattribute__(attr): + is_valid = False + print('attribute \'{0}\' missing.'.format(attr)) + + return is_valid + + def import_metadata(self, filename, format): + """Import metadata.""" + data = {} + if format == 'dv_up': + metadata = read_file_json(filename) + + for key, val in metadata['datasetVersion'].items(): + if key in self.__attr_flat: + data[key] = val + self.set(data) + elif format == 'dv_down': + metadata = read_file_json(filename) + self.set(data) + else: + # TODO: Exception + print('Data-format not right') + + @property + def dict(self): + """Get Dataset metadata as dict for Dataverse API upload. + + TODO: Validate standard + + """ + if self.is_valid(): + data = {} + + for attr in self.__attr_flat: + data[attr] = self.__getattribute__(attr) + + return data + else: + print('dict can not be created. Data is not valid') + return None + + @property + def json(self): + """Get Datafile metadata as json for Dataverse API upload. + + TODO: Validate standard + TODO: Link to default json file + + """ + return dict_to_json(self.dict) + + def export_metadata(self, filename, format): + """Export data to different file-formats. + + format: `dv_up` + + """ + if format == 'dv_up': + return write_file_json(filename, self.dict) + else: + # TODO: Exception + print('Data-format not right.') From 7c1bff6478c0546a2c9f9c5f627c51b1ef908473 Mon Sep 17 00:00:00 2001 From: Stefan Kasberger Date: Thu, 6 Jun 2019 16:28:48 +0200 Subject: [PATCH 18/46] add comment to internal data structures; fix bug with is_valid checks --- src/pyDataverse/models.py | 162 ++++++++++++++++++++++---------------- 1 file changed, 96 insertions(+), 66 deletions(-) diff --git a/src/pyDataverse/models.py b/src/pyDataverse/models.py index 553472c..26041df 100644 --- a/src/pyDataverse/models.py +++ b/src/pyDataverse/models.py @@ -24,12 +24,14 @@ class Dataverse(object): """ + """Attributes required to Dataverse metadata json.""" __attr_required = [ 'alias', 'name', 'contactEmail' ] - __attr_flat = [ + """Attributes on first level of Dataverse metadata json.""" + __attr_valid = [ 'alias', 'name', 'affiliation', @@ -39,14 +41,17 @@ class Dataverse(object): def __init__(self): """Init `Dataverse()` class.""" + """Misc""" + self.datasets = [] + self.dataverses = [] + + """Metadata""" self.name = None self.alias = None self.contactEmail = [] self.affiliation = None self.description = None self.dataverseType = None - self.datasets = [] - self.dataverses = [] def __str__(self): """Return name of Dataverse() class for users.""" @@ -77,7 +82,7 @@ def is_valid(self): print('attribute \'{0}\' missing.'.format(attr)) return is_valid - def import_metadata(self, filename, format): + def import_metadata(self, filename, format='dv_up'): """Import data from different sources. It is allowed to import incomplete Dataverses, where required @@ -111,8 +116,9 @@ def import_metadata(self, filename, format): if format == 'dv_up': metadata = read_file_json(filename) # get first level metadata and parse it automatically - for attr in self.__attr_flat: - data[attr] = metadata[attr] + for attr in self.__attr_valid: + if attr in metadata: + data[attr] = metadata[attr] # get nested metadata and parse it manually if 'dataverseContacts' in metadata: @@ -143,11 +149,9 @@ def dict(self): mapped on the first level of the dataverse up metadata structure. This should help to shorten code """ - for attr in self.__attr_flat: + for attr in self.__attr_valid: if self.__getattribute__(attr): data[attr] = self.__getattribute__(attr) - else: - print('attr {0} not in data model.'.format(attr)) # prüfen, ob required attributes gesetzt sind. wenn nicht = Exception! if self.contactEmail: @@ -188,7 +192,7 @@ def json(self): """ return dict_to_json(self.dict) - def export_metadata(self, filename, format): + def export_metadata(self, filename, format='dv_up'): """Export data to different file-formats. format: `dv_up` @@ -204,6 +208,7 @@ def export_metadata(self, filename, format): class Dataset(object): """Base class for the Dataset model.""" + """Attributes required for Dataset metadata json.""" __attr_required = [ 'title', 'author', @@ -212,12 +217,17 @@ class Dataset(object): 'subject' ] - __attr_flat = [ + """Attributes on first level of Dataverse metadata json inside + [\'datasetVersion\'].""" + __attr_datasetVersion = [ 'license', 'termsOfUse', 'termsOfAccess' ] - __attr_citation_flat = [ + + """Attributes on first level of Dataverse metadata json inside + [\'datasetVersion\'][\'metadataBlocks\'][\'citation\'].""" + __attr_citation = [ 'title', 'subtitle', 'alternativeTitle', @@ -240,7 +250,9 @@ class Dataset(object): 'kindOfData' ] - __attr_citation_arrays = { + """Attributes in Dataverse metadata json inside + [\'datasetVersion\'][\'metadataBlocks\'][\'citation\'][\'fields\'].""" + __attr_citation_fields = { 'otherId': ['otherIdAgency', 'otherIdValue'], 'author': ['authorName', 'authorAffiliation', 'authorIdentifierScheme', 'authorIdentifier'], 'datasetContact': ['datasetContactName', 'datasetContactAffiliation', 'datasetContactEmail'], @@ -257,16 +269,22 @@ class Dataset(object): 'software': ['softwareName', 'softwareVersion'] } - __attr_geospatial_flat = [ + """Attributes on first level of Dataverse metadata json inside + [\'datasetVersion\'][\'metadataBlocks\'][\'geospatial\'].""" + __attr_geospatial = [ 'geographicUnit' ] - __attr_geospatial_arrays = { + """Attributes in Dataverse metadata json inside + [\'datasetVersion\'][\'metadataBlocks\'][\'geospatial\'][\'fields\'].""" + __attr_geospatial_fields = { 'geographicCoverage': ['country', 'state', 'city', 'otherGeographicCoverage'], 'geographicBoundingBox': ['westLongitude', 'eastLongitude', 'northLongitude', 'southLongitude'] } - __attr_socialscience_flat = [ + """Attributes on first level of Dataverse metadata json inside + [\'datasetVersion\'][\'metadataBlocks\'][\'socialscience\'].""" + __attr_socialscience = [ 'unitOfAnalysis', 'universe', 'timeMethod', @@ -288,24 +306,29 @@ class Dataset(object): 'otherDataAppraisal', ] - __attr_journal_flat = [ + """Attributes on first level of Dataverse metadata json inside + [\'datasetVersion\'][\'metadataBlocks\'][\'journal\'].""" + __attr_journal = [ 'journalArticleType' ] - __attr_journal_arrays = { + """Attributes in Dataverse metadata json inside + [\'datasetVersion\'][\'metadataBlocks\'][\'journal\'][\'fields\'].""" + __attr_journal_fields = { 'journalVolumeIssue': ['journalVolume', 'journalIssue', 'journalPubDate'] } def __init__(self): """Init Dataset() class.""" + """Misc""" self.datafiles = [] - """dataset""" + """Metadata: dataset""" self.license = None self.termsOfUse = None self.termsOfAccess = None - """citation""" + """Metadata: citation""" self.citation_displayName = None self.title = None self.subtitle = None @@ -342,13 +365,13 @@ def __init__(self): self.characteristicOfSources = None self.accessToSources = None - """geospatial""" + """Metadata: geospatial""" self.geospatial_displayName = None self.geographicCoverage = [] self.geographicUnit = None self.geographicBoundingBox = [] - """socialscience""" + """Metadata: socialscience""" self.socialscience_displayName = None self.unitOfAnalysis = [] self.universe = [] @@ -372,7 +395,7 @@ def __init__(self): self.samplingErrorEstimates = None self.otherDataAppraisal = None - """journal""" + """Metadata: journal""" self.journal_displayName = None self.journalVolumeIssue = [] self.journalArticleType = None @@ -411,62 +434,62 @@ def is_valid(self): tp_cov = self.__getattribute__('timePeriodCovered') if tp_cov: for tp in tp_cov: - if tp['timePeriodCoveredStart'] or tp['timePeriodCoveredEnd']: - if not (tp['timePeriodCoveredStart'] and tp['timePeriodCoveredEnd']): + if 'timePeriodCoveredStart' in tp or 'timePeriodCoveredEnd' in tp: + if not ('timePeriodCoveredStart' in tp and 'timePeriodCoveredEnd' in tp): is_valid = False d_coll = self.__getattribute__('dateOfCollection') if d_coll: for d in d_coll: - if d['dateOfCollectionStart'] or d['dateOfCollectionEnd']: - if not (d['dateOfCollectionStart'] and d['dateOfCollectionEnd']): + if 'dateOfCollectionStart' in d or 'dateOfCollectionEnd' in d: + if not ('dateOfCollectionStart' in d and 'dateOfCollectionEnd' in d): is_valid = False authors = self.__getattribute__('author') if authors: for a in authors: - if a['authorAffiliation'] or a['authorIdentifierScheme'] or a['authorIdentifier']: - if not a['authorName']: + if 'authorAffiliation' in a or 'authorIdentifierScheme' in a or 'authorIdentifier' in a: + if not 'authorName' in a: is_valid = False ds_contac = self.__getattribute__('datasetContact') if ds_contac: for c in ds_contac: - if c['datasetContactAffiliation'] or c['datasetContactEmail']: - if not c['datasetContactName']: + if 'datasetContactAffiliation' in c or 'datasetContactEmail' in c: + if not 'datasetContactName' in c: is_valid = False producer = self.__getattribute__('producer') if producer: for p in producer: - if p['producerAffiliation'] or p['producerAbbreviation'] or p['producerURL'] or p['producerLogoURL']: + if 'producerAffiliation' in p or 'producerAbbreviation' in p or 'producerURL' in p or 'producerLogoURL' in p: if not p['producerName']: is_valid = False contributor = self.__getattribute__('contributor') if contributor: for c in contributor: - if c['contributorType']: - if not c['contributorName']: + if 'contributorType' in c: + if not 'contributorName' in c: is_valid = False distributor = self.__getattribute__('distributor') if distributor: for d in distributor: - if d['distributorAffiliation'] or d['distributorAbbreviation'] or d['distributorURL'] or d['distributorLogoURL']: - if not d['distributorName']: + if 'distributorAffiliation' in d or 'distributorAbbreviation' in d or 'distributorURL' in d or 'distributorLogoURL' in d: + if not 'distributorName' in d: is_valid = False bbox = self.__getattribute__('geographicBoundingBox') if bbox: for b in bbox: if b: - if not (b['westLongitude'] and b['eastLongitude'] and b['northLongitude'] and b['southLongitude']): + if not ('westLongitude' in b and 'eastLongitude' in b and 'northLongitude' in b and 'southLongitude' in b): is_valid = False return is_valid - def import_metadata(self, filename, format): + def import_metadata(self, filename, format='dv_up'): """Import metadata.""" data = {} if format == 'dv_up': @@ -474,7 +497,7 @@ def import_metadata(self, filename, format): """dataset""" # get first level metadata and parse it automatically for key, val in metadata['datasetVersion'].items(): - if key in self.__attr_flat: + if key in self.__attr_datasetVersion: data[key] = val # get nested metadata and parse it manually @@ -492,13 +515,13 @@ def import_metadata(self, filename, format): data['citation_displayName'] = citation['displayName'] for field in citation['fields']: - if field['typeName'] in self.__attr_citation_flat: + if field['typeName'] in self.__attr_citation: data[field['typeName']] = field['value'] - if field['typeName'] in self.__attr_citation_arrays: + if field['typeName'] in self.__attr_citation_fields: data[field['typeName']] = self.__parse_dicts( field['value'], - self.__attr_citation_arrays[field['typeName']]) + self.__attr_citation_fields[field['typeName']]) if field['typeName'] == 'series': if 'seriesName' in field['value']: @@ -516,13 +539,13 @@ def import_metadata(self, filename, format): self.__setattr__('geospatial_displayName', geospatial['displayName']) for field in geospatial['fields']: - if field['typeName'] in self.__attr_geospatial_flat: + if field['typeName'] in self.__attr_geospatial: data[field['typeName']] = field['value'] - if field['typeName'] in self.__attr_geospatial_arrays: + if field['typeName'] in self.__attr_geospatial_fields: data[field['typeName']] = self.__parse_dicts( field['value'], - self.__attr_geospatial_arrays[field['typeName']]) + self.__attr_geospatial_fields[field['typeName']]) else: # TODO: Exception print('geospatial not in json') @@ -534,7 +557,7 @@ def import_metadata(self, filename, format): self.__setattr__('socialscience_displayName', socialscience['displayName']) for field in socialscience['fields']: - if field['typeName'] in self.__attr_socialscience_flat: + if field['typeName'] in self.__attr_socialscience: data[field['typeName']] = field['value'] if field['typeName'] == 'targetSampleSize': @@ -561,13 +584,13 @@ def import_metadata(self, filename, format): self.__setattr__('journal_displayName', journal['displayName']) for field in journal['fields']: - if field['typeName'] in self.__attr_journal_flat: + if field['typeName'] in self.__attr_journal: data[field['typeName']] = field['value'] - if field['typeName'] in self.__attr_journal_arrays: + if field['typeName'] in self.__attr_journal_fields: data[field['typeName']] = self.__parse_dicts( field['value'], - self.__attr_journal_arrays[field['typeName']]) + self.__attr_journal_fields[field['typeName']]) else: # TODO: Exception print('journal not in json') @@ -624,7 +647,7 @@ def dict(self): """dataset""" # Generate first level attributes - for attr in self.__attr_flat: + for attr in self.__attr_datasetVersion: data['datasetVersion'][attr] = self.__getattribute__(attr) """citation""" @@ -632,14 +655,14 @@ def dict(self): citation['displayName'] = self.citation_displayName # Generate first level attributes - for attr in self.__attr_citation_flat: + for attr in self.__attr_citation: citation['fields'].append({ 'typeName': attr, 'value': self.__getattribute__(attr) }) # Generate fields attributes - for key, val in self.__attr_citation_arrays.items(): + for key, val in self.__attr_citation_fields.items(): citation['fields'].append({ 'typeName': key, 'value': self.__generate_dicts(key, val) @@ -664,14 +687,14 @@ def dict(self): """geospatial""" # Generate first level attributes - for attr in self.__attr_geospatial_flat: + for attr in self.__attr_geospatial: geospatial['fields'].append({ 'typeName': attr, 'value': self.__getattribute__(attr) }) # Generate fields attributes - for key, val in self.__attr_geospatial_arrays.items(): + for key, val in self.__attr_geospatial_fields.items(): # check if attribute exists geospatial['fields'].append({ 'typeName': key, @@ -680,7 +703,7 @@ def dict(self): """socialscience""" # Generate first level attributes - for attr in self.__attr_socialscience_flat: + for attr in self.__attr_socialscience: socialscience['fields'].append({ 'typeName': attr, 'value': self.__getattribute__(attr) @@ -726,14 +749,14 @@ def dict(self): """journal""" # Generate first level attributes - for attr in self.__attr_journal_flat: + for attr in self.__attr_journal: journal['fields'].append({ 'typeName': attr, 'value': self.__getattribute__(attr) }) # Generate fields attributes - for key, val in self.__attr_journal_arrays.items(): + for key, val in self.__attr_journal_fields.items(): journal['fields'].append({ 'typeName': key, 'value': self.__generate_dicts(key, val) @@ -779,7 +802,7 @@ def json(self): """ return dict_to_json(self.dict) - def export_metadata(self, filename, format): + def export_metadata(self, filename, format='dv_up'): """Export data to different file-formats. format: `dv_up` @@ -795,22 +818,28 @@ def export_metadata(self, filename, format): class Datafile(object): """Base class for the Datafile model.""" + """Attributes required for Datafile metadata json.""" __attr_required = [ 'filename', 'pid' ] - __attr_flat = [ + __attr_valid = [ 'description', - 'categories' + 'categories', + 'directoryLabel', + 'restrict' ] def __init__(self, filename=None, pid=None): """Init `Datafile()` class.""" + """Metadata""" self.filename = filename self.pid = pid self.description = None self.categories = [] + self.directoryLabel = None + self.restrict = None def __str__(self): """Return name of Datafile() class for users.""" @@ -842,15 +871,16 @@ def is_valid(self): return is_valid - def import_metadata(self, filename, format): + def import_metadata(self, filename, format='dv_up'): """Import metadata.""" data = {} if format == 'dv_up': metadata = read_file_json(filename) - for key, val in metadata['datasetVersion'].items(): - if key in self.__attr_flat: - data[key] = val + for attr in self.__attr_valid: + if attr in metadata: + data[attr] = metadata[attr] + self.set(data) elif format == 'dv_down': metadata = read_file_json(filename) @@ -869,7 +899,7 @@ def dict(self): if self.is_valid(): data = {} - for attr in self.__attr_flat: + for attr in self.__attr_valid: data[attr] = self.__getattribute__(attr) return data @@ -887,7 +917,7 @@ def json(self): """ return dict_to_json(self.dict) - def export_metadata(self, filename, format): + def export_metadata(self, filename, format='dv_up'): """Export data to different file-formats. format: `dv_up` From ca823c21d4c5dff2d2b71eb7f36f8b1dfbd334ae Mon Sep 17 00:00:00 2001 From: Stefan Kasberger Date: Thu, 6 Jun 2019 17:32:50 +0200 Subject: [PATCH 19/46] re-structure dict and json functions of Dataverse, Dataset and Datafile; add read_csv_to_dict() to utils --- src/pyDataverse/models.py | 440 ++++++++++++++++++++++---------------- src/pyDataverse/utils.py | 10 + 2 files changed, 265 insertions(+), 185 deletions(-) diff --git a/src/pyDataverse/models.py b/src/pyDataverse/models.py index 26041df..3c5cb47 100644 --- a/src/pyDataverse/models.py +++ b/src/pyDataverse/models.py @@ -3,7 +3,6 @@ """Find out more at https://github.com/AUSSDA/pyDataverse.""" from __future__ import absolute_import from pyDataverse.utils import dict_to_json -from pyDataverse.utils import json_to_dict from pyDataverse.utils import read_file_json from pyDataverse.utils import write_file_json @@ -38,6 +37,10 @@ class Dataverse(object): 'description', 'dataverseType' ] + __attr_misc = [ + 'datasets', + 'dataverses' + ] def __init__(self): """Init `Dataverse()` class.""" @@ -91,6 +94,8 @@ def import_metadata(self, filename, format='dv_up'): Simmply parse in the data. No validation needed. This will be done later before the export. + format: dv_up + Example: Default dataverse metadata json: { "name": "Scientific Research", @@ -133,41 +138,48 @@ def import_metadata(self, filename, format='dv_up'): self.set(data) else: # TODO: Exception - print('Data-format not right') + print('Data-format not right.') - @property - def dict(self): + def dict(self, format='dv_up'): """Get Dataverse metadata as dict for Dataverse API upload. TODO: Validate standard + format: all, dv_up + + if data is valid will be checked here. so it is not necessary anymore in json(). + """ - if self.is_valid(): - data = {} - """ - dv_attr_list contains all metadata related attributes, which are - mapped on the first level of the dataverse up metadata structure. - This should help to shorten code - """ - for attr in self.__attr_valid: - if self.__getattribute__(attr): - data[attr] = self.__getattribute__(attr) + data = {} + if format == 'dv_up': + if self.is_valid(): + for attr in self.__attr_valid: + if self.__getattribute__(attr): + data[attr] = self.__getattribute__(attr) + + # prüfen, ob required attributes gesetzt sind. wenn nicht = Exception! + if self.contactEmail: + data['dataverseContacts'] = [] + for email in self.contactEmail: + data['dataverseContacts'].append({'contactEmail': email}) + else: + print('Key contactEmail not in data model.') - # prüfen, ob required attributes gesetzt sind. wenn nicht = Exception! - if self.contactEmail: - data['dataverseContacts'] = [] - for email in self.contactEmail: - data['dataverseContacts'].append({'contactEmail': email}) + return data else: - print('Key contactEmail not in data model.') - + print('dict can not be created. Data is not valid for format') + return None + elif format == 'all': + for attr in self.__attr_misc + self.__attr_valid: + if self.__getattribute__(attr): + data[attr] = self.__getattribute__(attr) return data else: - print('dict can not be created. Data is not valid') + # TODO: Exception + print('Format not right for dict.') return None - @property - def json(self): + def json(self, format='dv_up'): """Get Dataverse metadata as json for Dataverse API upload. TODO: Validate standard @@ -190,16 +202,24 @@ def json(self): } """ - return dict_to_json(self.dict) + if format == 'dv_up': + return dict_to_json(self.dict()) + elif format == 'all': + return dict_to_json(self.dict('all')) + else: + # TODO Exception + print('data format not valid.') def export_metadata(self, filename, format='dv_up'): """Export data to different file-formats. + exports only to metadata standards. + format: `dv_up` """ if format == 'dv_up': - return write_file_json(filename, self.dict) + return write_file_json(filename, self.dict()) else: # TODO: Exception print('Data-format not right.') @@ -413,7 +433,8 @@ def set(self, data): """ for key, val in data.items(): - self.__setattr__(key, val) + if key: + self.__setattr__(key, val) def is_valid(self): """Check if metadata stored in attributes is valid for dataverse api upload. @@ -490,7 +511,11 @@ def is_valid(self): return is_valid def import_metadata(self, filename, format='dv_up'): - """Import metadata.""" + """Import metadata. + + format: dv_up + + """ data = {} if format == 'dv_up': metadata = read_file_json(filename) @@ -625,152 +650,166 @@ def __parse_dicts(self, data, attr_list): return data_tmp - @property - def dict(self): + def dict(self, format='dv_up'): """Get Dataset metadata as dict for Dataverse API upload. TODO: Validate standard - """ - if self.is_valid(): - data = {} - data['datasetVersion'] = {} - data['datasetVersion']['metadataBlocks'] = {} - citation = {} - citation['fields'] = [] - geospatial = {} - geospatial['fields'] = [] - socialscience = {} - socialscience['fields'] = [] - journal = {} - journal['fields'] = [] - - """dataset""" - # Generate first level attributes - for attr in self.__attr_datasetVersion: - data['datasetVersion'][attr] = self.__getattribute__(attr) - - """citation""" - if self.citation_displayName: - citation['displayName'] = self.citation_displayName - - # Generate first level attributes - for attr in self.__attr_citation: - citation['fields'].append({ - 'typeName': attr, - 'value': self.__getattribute__(attr) - }) - - # Generate fields attributes - for key, val in self.__attr_citation_fields.items(): - citation['fields'].append({ - 'typeName': key, - 'value': self.__generate_dicts(key, val) - }) - - # Generate series attributes - if self.__getattribute__('series'): - tmp_dict = {} - tmp_dict['value'] = {} - if 'seriesName' in self.__getattribute__('series'): - tmp_dict['value']['seriesName'] = {} - tmp_dict['value']['seriesName']['typeName'] = 'seriesName' - tmp_dict['value']['seriesName']['value'] = self.__getattribute__('seriesName') - if 'seriesInformation' in self.__getattribute__('series'): - tmp_dict['value']['seriesInformation'] = {} - tmp_dict['value']['seriesInformation']['typeName'] = 'seriesInformation' - tmp_dict['value']['seriesInformation']['value'] = self.__getattribute__('seriesInformation') - citation['fields'].append({ - 'typeName': 'series', - 'value': tmp_dict - }) - - """geospatial""" - # Generate first level attributes - for attr in self.__attr_geospatial: - geospatial['fields'].append({ - 'typeName': attr, - 'value': self.__getattribute__(attr) - }) - - # Generate fields attributes - for key, val in self.__attr_geospatial_fields.items(): - # check if attribute exists - geospatial['fields'].append({ - 'typeName': key, - 'value': self.__generate_dicts(key, val) - }) - - """socialscience""" - # Generate first level attributes - for attr in self.__attr_socialscience: - socialscience['fields'].append({ - 'typeName': attr, - 'value': self.__getattribute__(attr) - }) - - # Generate targetSampleSize attributes - if self.__getattribute__('targetSampleSize'): - tmp_dict = {} - tmp_dict['value'] = {} - if 'targetSampleActualSize' in self.__getattribute__('targetSampleSize'): - tmp_dict['value']['targetSampleActualSize'] = {} - tmp_dict['value']['targetSampleActualSize']['typeName'] = 'targetSampleActualSize' - tmp_dict['value']['targetSampleActualSize']['value'] = self.__getattribute__('targetSampleActualSize') - if 'targetSampleSizeFormula' in self.__getattribute__('targetSampleSize'): - tmp_dict['value']['targetSampleSizeFormula'] = {} - tmp_dict['value']['targetSampleSizeFormula']['typeName'] = 'targetSampleSizeFormula' - tmp_dict['value']['targetSampleSizeFormula']['value'] = self.__getattribute__('targetSampleSizeFormula') - socialscience['fields'].append({ - 'typeName': 'series', - 'value': tmp_dict - }) - - # Generate socialScienceNotes attributes - if self.__getattribute__('socialScienceNotes'): - tmp_dict = {} - tmp_dict['value'] = {} - if 'socialScienceNotesType' in self.__getattribute__('socialScienceNotes'): - tmp_dict['value']['socialScienceNotesType'] = {} - tmp_dict['value']['socialScienceNotesType']['typeName'] = 'socialScienceNotesType' - tmp_dict['value']['socialScienceNotesType']['value'] = self.__getattribute__('socialScienceNotesType') - if 'socialScienceNotesSubject' in self.__getattribute__('socialScienceNotes'): - tmp_dict['value']['socialScienceNotesSubject'] = {} - tmp_dict['value']['socialScienceNotesSubject']['typeName'] = 'socialScienceNotesSubject' - tmp_dict['value']['socialScienceNotesSubject']['value'] = self.__getattribute__('socialScienceNotesSubject') - if 'socialScienceNotesText' in self.__getattribute__('socialScienceNotes'): - tmp_dict['value']['socialScienceNotesText'] = {} - tmp_dict['value']['socialScienceNotesText']['typeName'] = 'socialScienceNotesText' - tmp_dict['value']['socialScienceNotesText']['value'] = self.__getattribute__('socialScienceNotesText') - socialscience['fields'].append({ - 'typeName': 'series', - 'value': tmp_dict - }) - - """journal""" - # Generate first level attributes - for attr in self.__attr_journal: - journal['fields'].append({ - 'typeName': attr, - 'value': self.__getattribute__(attr) - }) - - # Generate fields attributes - for key, val in self.__attr_journal_fields.items(): - journal['fields'].append({ - 'typeName': key, - 'value': self.__generate_dicts(key, val) - }) - - # TODO: prüfen, ob required attributes gesetzt sind. wenn nicht = Exception! - data['datasetVersion']['metadataBlocks']['citation'] = citation - data['datasetVersion']['metadataBlocks']['socialscience'] = socialscience - data['datasetVersion']['metadataBlocks']['geospatial'] = geospatial - data['datasetVersion']['metadataBlocks']['journal'] = journal + format: dv_up, all + """ + if format == 'dv_up': + if self.is_valid(): + data = {} + data['datasetVersion'] = {} + data['datasetVersion']['metadataBlocks'] = {} + citation = {} + citation['fields'] = [] + geospatial = {} + geospatial['fields'] = [] + socialscience = {} + socialscience['fields'] = [] + journal = {} + journal['fields'] = [] + + """dataset""" + # Generate first level attributes + for attr in self.__attr_datasetVersion: + data['datasetVersion'][attr] = self.__getattribute__(attr) + + """citation""" + if self.citation_displayName: + citation['displayName'] = self.citation_displayName + + # Generate first level attributes + for attr in self.__attr_citation: + citation['fields'].append({ + 'typeName': attr, + 'value': self.__getattribute__(attr) + }) + + # Generate fields attributes + for key, val in self.__attr_citation_fields.items(): + citation['fields'].append({ + 'typeName': key, + 'value': self.__generate_dicts(key, val) + }) + + # Generate series attributes + if self.__getattribute__('series'): + tmp_dict = {} + tmp_dict['value'] = {} + if 'seriesName' in self.__getattribute__('series'): + tmp_dict['value']['seriesName'] = {} + tmp_dict['value']['seriesName']['typeName'] = 'seriesName' + tmp_dict['value']['seriesName']['value'] = self.__getattribute__('seriesName') + if 'seriesInformation' in self.__getattribute__('series'): + tmp_dict['value']['seriesInformation'] = {} + tmp_dict['value']['seriesInformation']['typeName'] = 'seriesInformation' + tmp_dict['value']['seriesInformation']['value'] = self.__getattribute__('seriesInformation') + citation['fields'].append({ + 'typeName': 'series', + 'value': tmp_dict + }) + + """geospatial""" + # Generate first level attributes + for attr in self.__attr_geospatial: + geospatial['fields'].append({ + 'typeName': attr, + 'value': self.__getattribute__(attr) + }) + + # Generate fields attributes + for key, val in self.__attr_geospatial_fields.items(): + # check if attribute exists + geospatial['fields'].append({ + 'typeName': key, + 'value': self.__generate_dicts(key, val) + }) + + """socialscience""" + # Generate first level attributes + for attr in self.__attr_socialscience: + socialscience['fields'].append({ + 'typeName': attr, + 'value': self.__getattribute__(attr) + }) + + # Generate targetSampleSize attributes + if self.__getattribute__('targetSampleSize'): + tmp_dict = {} + tmp_dict['value'] = {} + if 'targetSampleActualSize' in self.__getattribute__('targetSampleSize'): + tmp_dict['value']['targetSampleActualSize'] = {} + tmp_dict['value']['targetSampleActualSize']['typeName'] = 'targetSampleActualSize' + tmp_dict['value']['targetSampleActualSize']['value'] = self.__getattribute__('targetSampleActualSize') + if 'targetSampleSizeFormula' in self.__getattribute__('targetSampleSize'): + tmp_dict['value']['targetSampleSizeFormula'] = {} + tmp_dict['value']['targetSampleSizeFormula']['typeName'] = 'targetSampleSizeFormula' + tmp_dict['value']['targetSampleSizeFormula']['value'] = self.__getattribute__('targetSampleSizeFormula') + socialscience['fields'].append({ + 'typeName': 'series', + 'value': tmp_dict + }) + + # Generate socialScienceNotes attributes + if self.__getattribute__('socialScienceNotes'): + tmp_dict = {} + tmp_dict['value'] = {} + if 'socialScienceNotesType' in self.__getattribute__('socialScienceNotes'): + tmp_dict['value']['socialScienceNotesType'] = {} + tmp_dict['value']['socialScienceNotesType']['typeName'] = 'socialScienceNotesType' + tmp_dict['value']['socialScienceNotesType']['value'] = self.__getattribute__('socialScienceNotesType') + if 'socialScienceNotesSubject' in self.__getattribute__('socialScienceNotes'): + tmp_dict['value']['socialScienceNotesSubject'] = {} + tmp_dict['value']['socialScienceNotesSubject']['typeName'] = 'socialScienceNotesSubject' + tmp_dict['value']['socialScienceNotesSubject']['value'] = self.__getattribute__('socialScienceNotesSubject') + if 'socialScienceNotesText' in self.__getattribute__('socialScienceNotes'): + tmp_dict['value']['socialScienceNotesText'] = {} + tmp_dict['value']['socialScienceNotesText']['typeName'] = 'socialScienceNotesText' + tmp_dict['value']['socialScienceNotesText']['value'] = self.__getattribute__('socialScienceNotesText') + socialscience['fields'].append({ + 'typeName': 'series', + 'value': tmp_dict + }) + + """journal""" + # Generate first level attributes + for attr in self.__attr_journal: + journal['fields'].append({ + 'typeName': attr, + 'value': self.__getattribute__(attr) + }) + + # Generate fields attributes + for key, val in self.__attr_journal_fields.items(): + journal['fields'].append({ + 'typeName': key, + 'value': self.__generate_dicts(key, val) + }) + + # TODO: prüfen, ob required attributes gesetzt sind. wenn nicht = Exception! + data['datasetVersion']['metadataBlocks']['citation'] = citation + data['datasetVersion']['metadataBlocks']['socialscience'] = socialscience + data['datasetVersion']['metadataBlocks']['geospatial'] = geospatial + data['datasetVersion']['metadataBlocks']['journal'] = journal + + return data + else: + print('dict can not be created. Data is not valid for format') + return None + elif format == 'all': + attr_lst = self.__attr_datasetVersion + self.__attr_citation + self.__attr_geospatial + self.__attr_socialscience + self.__attr_journal + for key, val in self.__attr_citation_fields.update(self.__attr_geospatial_fields.update(self.__attr_journal_fields)): + attr_lst.append(key) + for attr in attr_lst: + if self.__getattribute__(attr): + data[attr] = self.__getattribute__(attr) return data + else: - print('dict can not be created. Data is not valid') + print('dict can not be created. Format is not valid') return None def __generate_dicts(self, key, val): @@ -792,15 +831,20 @@ def __generate_dicts(self, key, val): return tmp_list - @property - def json(self): + def json(self, format='dv_up'): """Get Dataset metadata as json for Dataverse API upload. TODO: Validate standard TODO: Link to default json file """ - return dict_to_json(self.dict) + if format == 'dv_up': + return dict_to_json(self.dict()) + elif format == 'all': + return dict_to_json(self.dict('all')) + else: + # TODO Exception + print('data format not valid.') def export_metadata(self, filename, format='dv_up'): """Export data to different file-formats. @@ -809,7 +853,7 @@ def export_metadata(self, filename, format='dv_up'): """ if format == 'dv_up': - return write_file_json(filename, self.dict) + return write_file_json(filename, self.dict()) else: # TODO: Exception print('Data-format not right.') @@ -824,18 +868,26 @@ class Datafile(object): 'pid' ] + """Attributes on first level of Datafile metadata json.""" __attr_valid = [ 'description', 'categories', 'directoryLabel', 'restrict' ] + """Attributes on first level of Datafile metadata json.""" + __attr_misc = [ + 'pid', + 'filename' + ] def __init__(self, filename=None, pid=None): """Init `Datafile()` class.""" - """Metadata""" - self.filename = filename + """Misc""" self.pid = pid + self.filename = filename + + """Metadata""" self.description = None self.categories = [] self.directoryLabel = None @@ -872,7 +924,11 @@ def is_valid(self): return is_valid def import_metadata(self, filename, format='dv_up'): - """Import metadata.""" + """Import metadata. + + format: dv_up + + """ data = {} if format == 'dv_up': metadata = read_file_json(filename) @@ -884,38 +940,52 @@ def import_metadata(self, filename, format='dv_up'): self.set(data) elif format == 'dv_down': metadata = read_file_json(filename) - self.set(data) + self.set(metadata) else: # TODO: Exception print('Data-format not right') - @property - def dict(self): + def dict(self, format='dv_up'): """Get Dataset metadata as dict for Dataverse API upload. TODO: Validate standard """ - if self.is_valid(): - data = {} + if format == 'dv_up': + if self.is_valid(): + data = {} - for attr in self.__attr_valid: - data[attr] = self.__getattribute__(attr) + for attr in self.__attr_valid: + data[attr] = self.__getattribute__(attr) + return data + else: + print('dict can not be created. Data is not valid') + return None + elif format == 'all': + for attr in self.__attr_misc + self.__attr_valid: + if self.__getattribute__(attr): + data[attr] = self.__getattribute__(attr) return data else: - print('dict can not be created. Data is not valid') + # TODO: Exception + print('Format not right for dict.') return None - @property - def json(self): + def json(self, format='dv_up'): """Get Datafile metadata as json for Dataverse API upload. TODO: Validate standard TODO: Link to default json file """ - return dict_to_json(self.dict) + if format == 'dv_up': + return dict_to_json(self.dict()) + elif format == 'all': + return dict_to_json(self.dict('all')) + else: + # TODO Exception + print('data format not valid.') def export_metadata(self, filename, format='dv_up'): """Export data to different file-formats. diff --git a/src/pyDataverse/utils.py b/src/pyDataverse/utils.py index 0f07f77..607dbfd 100644 --- a/src/pyDataverse/utils.py +++ b/src/pyDataverse/utils.py @@ -1,6 +1,7 @@ # !/usr/bin/env python # -*- coding: utf-8 -*- """Find out more at https://github.com/AUSSDA/pyDataverse.""" +import csv import json @@ -138,3 +139,12 @@ def write_file_json(filename, data, mode='w'): """ write_file(filename, dict_to_json(data), mode) + + +def read_csv_to_dict(filename): + """Read in csv file and convert it into a list of dicts.""" + reader = csv.DictReader(open(filename), 'r') + data = [] + for row in reader: + data.append(row) + return data From 4597a93a3bcfb525ca9182eabc002468d9d3843b Mon Sep 17 00:00:00 2001 From: Stefan Kasberger Date: Thu, 6 Jun 2019 18:17:43 +0200 Subject: [PATCH 20/46] rename read_csv_to_dict(); add docstring to csv_to_dict(); change attribute name from doi to pid in Datafile() --- src/pyDataverse/models.py | 4 +++- src/pyDataverse/utils.py | 18 +++++++++++++++--- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/src/pyDataverse/models.py b/src/pyDataverse/models.py index 3c5cb47..a187b3d 100644 --- a/src/pyDataverse/models.py +++ b/src/pyDataverse/models.py @@ -39,7 +39,8 @@ class Dataverse(object): ] __attr_misc = [ 'datasets', - 'dataverses' + 'dataverses', + 'pid' ] def __init__(self): @@ -47,6 +48,7 @@ def __init__(self): """Misc""" self.datasets = [] self.dataverses = [] + self.pid = None """Metadata""" self.name = None diff --git a/src/pyDataverse/utils.py b/src/pyDataverse/utils.py index 607dbfd..f0ea96c 100644 --- a/src/pyDataverse/utils.py +++ b/src/pyDataverse/utils.py @@ -141,10 +141,22 @@ def write_file_json(filename, data, mode='w'): write_file(filename, dict_to_json(data), mode) -def read_csv_to_dict(filename): - """Read in csv file and convert it into a list of dicts.""" +def csv_to_dict(filename): + """Read in csv file and convert it into a list of dicts. + + Parameters + ---------- + filename : string + Filename with full path. + + Returns + ------- + dict + List with dicts for each row. + + """ reader = csv.DictReader(open(filename), 'r') data = [] for row in reader: - data.append(row) + data.append(dict(row)) return data From 117b7e06b5f4ed8eac1b622edea1ddb63833339c Mon Sep 17 00:00:00 2001 From: Stefan Kasberger Date: Sat, 8 Jun 2019 01:54:08 +0200 Subject: [PATCH 21/46] minor updates in utils.py and models.py --- src/pyDataverse/models.py | 2 -- src/pyDataverse/utils.py | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/pyDataverse/models.py b/src/pyDataverse/models.py index a187b3d..0faaf71 100644 --- a/src/pyDataverse/models.py +++ b/src/pyDataverse/models.py @@ -158,7 +158,6 @@ def dict(self, format='dv_up'): for attr in self.__attr_valid: if self.__getattribute__(attr): data[attr] = self.__getattribute__(attr) - # prüfen, ob required attributes gesetzt sind. wenn nicht = Exception! if self.contactEmail: data['dataverseContacts'] = [] @@ -166,7 +165,6 @@ def dict(self, format='dv_up'): data['dataverseContacts'].append({'contactEmail': email}) else: print('Key contactEmail not in data model.') - return data else: print('dict can not be created. Data is not valid for format') diff --git a/src/pyDataverse/utils.py b/src/pyDataverse/utils.py index f0ea96c..e9d92b0 100644 --- a/src/pyDataverse/utils.py +++ b/src/pyDataverse/utils.py @@ -119,7 +119,7 @@ def read_file_json(filename): """ try: - return json.loads(read_file(filename, 'r')) + return json_to_dict(read_file(filename, 'r')) except Exception as e: raise e From 04adf6e944352e4e784f233f3c9f67f5ecb5502a Mon Sep 17 00:00:00 2001 From: Stefan Kasberger Date: Sat, 8 Jun 2019 01:55:59 +0200 Subject: [PATCH 22/46] add tests for Dataverse() and Dataset() --- tests/conftest.py | 44 +++++++++++ tests/test_api.py | 42 +++------- tests/test_models_dataset.py | 108 ++++++++++++++++++++++++++ tests/test_models_dataverse.py | 137 +++++++++++++++++++++++++++++++++ 4 files changed, 300 insertions(+), 31 deletions(-) create mode 100644 tests/conftest.py create mode 100644 tests/test_models_dataset.py create mode 100644 tests/test_models_dataverse.py diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..2d02cd3 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,44 @@ +import json +import os +from pyDataverse.api import Api +import pytest + + +@pytest.fixture(scope='module') +def api_connection(): + api_token = os.environ['API_TOKEN'] + base_url = os.environ['BASE_URL'] + return Api(base_url, api_token) + + +@pytest.fixture +def read_json(filename): + j2d(read_file(filename, 'r')) + + +@pytest.fixture +def read_file(filename): + with open(filename, 'r') as f: + data = f.read() + return data + + +@pytest.fixture +def write_file(filename, data): + with open(filename, 'w') as f: + f.write(data) + + +@pytest.fixture +def write_json(filename, data): + write_file(filename, d2j(data)) + + +@pytest.fixture +def j2d(data): + return json.loads(data) + + +@pytest.fixture +def d2j(data): + json.dumps(data, ensure_ascii=False, indent=2) diff --git a/tests/test_api.py b/tests/test_api.py index 8cfb88c..7000722 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -11,16 +11,7 @@ TEST_DIR = os.path.dirname(os.path.realpath(__file__)) -SLEEP_TIME = 1 - -if 'API_TOKEN' in os.environ: - API_TOKEN = os.environ['API_TOKEN'] -else: - print('ERROR: Environment variable API_TOKEN for test missing.') -if 'BASE_URL' in os.environ: - BASE_URL = os.environ['BASE_URL'] -else: - print('ERROR: Environment variable BASE_URL for test missing.') +SLEEP_TIME = 0.1 class TestApiConnect(object): @@ -28,17 +19,15 @@ class TestApiConnect(object): def test_api_connect(self): """Test successfull connection without api_token.""" - api = Api(BASE_URL) + api = Api(os.environ['BASE_URL']) sleep(SLEEP_TIME) - time_window_start = datetime.now() - timedelta(seconds=10) assert isinstance(api, Api) assert not api.api_token assert api.api_version == 'v1' - assert api.conn_started > time_window_start assert isinstance(api.conn_started, datetime) - assert api.base_url == BASE_URL + assert api.base_url == os.environ['BASE_URL'] assert api.native_api_base_url == '{0}/api/{1}'.format( - BASE_URL, api.api_version) + os.environ['BASE_URL'], api.api_version) assert api.status == 'OK' def test_api_connect_base_url_wrong(self): @@ -48,10 +37,8 @@ def test_api_connect_base_url_wrong(self): base_url = 'http://wikipedia.org' api = Api(base_url) sleep(SLEEP_TIME) - time_window_start = datetime.now() - timedelta(seconds=10) assert not api.api_token assert api.api_version == 'v1' - assert api.conn_started > time_window_start assert api.base_url == 'http://wikipedia.org' assert api.native_api_base_url == 'http://wikipedia.org/api/v1' assert api.status == 'ERROR' @@ -61,10 +48,8 @@ def test_api_connect_base_url_wrong(self): base_url = None api = Api(base_url) sleep(SLEEP_TIME) - time_window_start = datetime.now() - timedelta(seconds=10) assert not api.api_token assert api.api_version == 'v1' - assert api.conn_started > time_window_start assert not api.base_url assert not api.native_api_base_url assert api.status == 'ERROR' @@ -79,25 +64,20 @@ class TestApiRequests(object): def setup_class(cls): """Create the api connection for later use.""" cls.dataverse_id = 'test-pyDataverse' - cls.filename_dataverse = TEST_DIR+'/data/dataverse_minimum_1.json.json' - cls.filename_dataset = TEST_DIR+'/data/dataset_full.json.json' - cls.api = Api(BASE_URL, api_token=API_TOKEN) - sleep(SLEEP_TIME) - assert cls.api - assert cls.api.api_token - assert cls.api.base_url - def test_get_request(self): + def test_get_request(self, api_connection): """Test successfull `.get_request()` request.""" # TODO: test params und auth default + api = api_connection query_str = '/info/server' - resp = self.api.get_request(query_str) + resp = api.get_request(query_str) sleep(SLEEP_TIME) - assert self.api.status == 'OK' + assert api.status == 'OK' assert isinstance(resp, Response) - def test_get_dataverse(self): + def test_get_dataverse(self, api_connection): """Test successfull `.get_dataverse()` request`.""" - resp = self.api.get_dataverse(':root') + api = api_connection + resp = api.get_dataverse(':root') sleep(SLEEP_TIME) assert isinstance(resp, Response) diff --git a/tests/test_models_dataset.py b/tests/test_models_dataset.py new file mode 100644 index 0000000..0a53ee0 --- /dev/null +++ b/tests/test_models_dataset.py @@ -0,0 +1,108 @@ +# coding: utf-8 +import pytest +from pyDataverse.models import Dataset + + +class TestDataset(object): + """Test the Dataset() class initalization.""" + + def test_dataset_init(self): + pass + + def test_dataset_set_dvup(self): + pass + + def test_dataset_set_dvup_less(self): + pass + + def test_dataset_set_dvup_more(self): + pass + + def test_dataset_is_valid(self): + pass + + def test_dataset_is_valid_not(self): + pass + + def test_dataset_import_metadata_dv_up(self): + + """Dataset""" + assert self.license == + assert self.termsOfUse == + assert self.termsOfAccess == + + """Citation""" + assert self.citation_displayName == + assert self.title == + assert self.subtitle == + assert self.alternativeTitle == + assert self.alternativeURL == + assert self.otherId = [] + assert self.author = [] + assert self.datasetContact = [] + assert self.dsDescription = [] + assert self.subject = [] + assert self.keyword = [] + assert self.topicClassification = [] + assert self.publication = [] + assert self.notesText == + assert self.producer = [] + assert self.productionDate == + assert self.productionPlace == + assert self.contributor = [] + assert self.grantNumber = [] + assert self.distributor = [] + assert self.distributionDate == + assert self.depositor == + assert self.dateOfDeposit == + assert self.timePeriodCovered = [] + assert self.dateOfCollection = [] + assert self.kindOfData = [] + assert self.series = [] + assert self.software = [] + assert self.relatedMaterial = [] + assert self.relatedDatasets = [] + assert self.otherReferences = [] + assert self.dataSources = [] + assert self.originOfSources == + assert self.characteristicOfSources == + assert self.accessToSources == + + """Geospatial""" + assert self.geospatial_displayName == + assert self.geographicCoverage = [] + assert self.geographicUnit == + assert self.geographicBoundingBox = [] + + def test_dataset_import_metadata_wrong(self): + pass + + def test_dataset_dict_dv_up_valid_minimum(self): + pass + + def test_dataset_dict_dv_up_valid_full(self): + pass + + def test_dataset_dict_dv_up_valid_not(self): + pass + + def test_dataset_dict_all(self): + pass + + def test_dataset_dict_wrong(self): + pass + + def test_dataset_json_dv_up(self): + pass + + def test_dataset_json_all(self): + pass + + def test_dataset_json_wrong(self): + pass + + def test_dataset_export_metadata_dv_up(self): + pass + + def test_dataset_export_metadata_wrong(self): + pass diff --git a/tests/test_models_dataverse.py b/tests/test_models_dataverse.py new file mode 100644 index 0000000..3148717 --- /dev/null +++ b/tests/test_models_dataverse.py @@ -0,0 +1,137 @@ +# coding: utf-8 +import pytest +from pyDataverse.models import Dataverse + + +class TestDataverse(object): + """Test the Api() class initalization.""" + + def test_dataverse_init(self): + dv = Dataverse() + + assert isinstance(dv.datasets, list) + assert not dv.datasets + assert isinstance(dv.dataverses, list) + assert not dv.dataverses + assert not dv.pid + assert not dv.name + assert not dv.alias + assert isinstance(dv.contactEmail, list) + assert not dv.contactEmail + assert not dv.affiliation + assert not dv.description + assert not dv.dataverseType + + def test_dataverse_set_dv_up(self, read_json): + data = read_json('data/dataverse_minimum_1.json') + dv = Dataverse() + dv.set(data) + + assert isinstance(dv.datasets, list) + assert not dv.datasets + assert isinstance(dv.dataverses, list) + assert not dv.dataverses + assert not dv.pid + assert dv.alias == 'test-pyDataverse' + assert dv.name == 'Test pyDataverse' + assert isinstance(dv.dataverseContacts, list) + assert len(dv.dataverseContacts) == 1 + assert dv.dataverseContact[0]['contactEmail'] == 'info@aussda.at' + + + def test_dataverse_is_valid(self): + data = read_json('data/dataverse_minimum_1.json') + dv = Dataverse() + dv.set(data) + + assert isinstance(dv.datasets, list) + assert not dv.datasets + assert isinstance(dv.dataverses, list) + assert not dv.dataverses + assert not dv.pid + assert dv.alias == 'test-pyDataverse' + assert dv.name == 'Test pyDataverse' + assert isinstance(dv.dataverseContacts, list) + assert len(dv.dataverseContacts) == 1 + assert dv.dataverseContact[0]['contactEmail'] == 'info@aussda.at' + assert dv.is_valid() + + def test_dataverse_is_valid_not(self): + data = read_json('data/dataverse_minimum_1.json') + dv = Dataverse() + dv.set(data) + dv.name = None + + assert not dv.is_valid() + assert isinstance(dv.datasets, list) + assert not dv.datasets + assert isinstance(dv.dataverses, list) + assert not dv.dataverses + assert not dv.pid + assert dv.alias == 'test-pyDataverse' + assert not dv.name + assert isinstance(dv.dataverseContacts, list) + assert len(dv.dataverseContacts) == 1 + assert dv.dataverseContact[0]['contactEmail'] == 'info@aussda.at' + + def test_dataverse_import_metadata_dv_up(self): + dv = Dataverse() + dv.import_metadata('data/dataverse_minimum_1.json') + + assert isinstance(dv.datasets, list) + assert not dv.datasets + assert isinstance(dv.dataverses, list) + assert not dv.dataverses + assert not dv.pid + assert dv.alias == 'test-pyDataverse' + assert dv.name == 'Test pyDataverse' + assert isinstance(dv.dataverseContacts, list) + assert len(dv.dataverseContacts) == 1 + assert dv.dataverseContact[0]['contactEmail'] == 'info@aussda.at' + + def test_dataverse_import_metadata_wrong(self): + dv = Dataverse() + dv.import_metadata('data/dataverse_minimum_1.json', 'wrong_data-format') + + assert isinstance(dv.datasets, list) + assert not dv.datasets + assert isinstance(dv.dataverses, list) + assert not dv.dataverses + assert not dv.pid + assert not dv.name + assert not dv.alias + assert isinstance(dv.contactEmail, list) + assert not dv.contactEmail + assert not dv.affiliation + assert not dv.description + assert not dv.dataverseType + + def test_dataverse_dict_dv_up_valid_minimum(self): + pass + + def test_dataverse_dict_dv_up_valid_full(self): + pass + + def test_dataverse_dict_dv_up_valid_not(self): + pass + + def test_dataverse_dict_all(self): + pass + + def test_dataverse_dict_wrong(self): + pass + + def test_dataverse_json_dv_up(self): + pass + + def test_dataverse_json_all(self): + pass + + def test_dataverse_json_wrong(self): + pass + + def test_dataverse_export_metadata_dv_up(self): + pass + + def test_dataverse_export_metadata_wrong(self): + pass From 0734e355b7e494458bb2f42bd77617b40ab635cc Mon Sep 17 00:00:00 2001 From: Stefan Kasberger Date: Sat, 8 Jun 2019 16:39:10 +0200 Subject: [PATCH 23/46] add tests for ds.import_metadata and Dataverse(); fix minor issues appearing after tests --- src/pyDataverse/models.py | 37 +++--- tests/test_models_dataset.py | 221 +++++++++++++++++++++++++-------- tests/test_models_dataverse.py | 30 ++--- 3 files changed, 204 insertions(+), 84 deletions(-) diff --git a/src/pyDataverse/models.py b/src/pyDataverse/models.py index 0faaf71..708d09b 100644 --- a/src/pyDataverse/models.py +++ b/src/pyDataverse/models.py @@ -27,7 +27,7 @@ class Dataverse(object): __attr_required = [ 'alias', 'name', - 'contactEmail' + 'dataverseContacts' ] """Attributes on first level of Dataverse metadata json.""" __attr_valid = [ @@ -35,6 +35,7 @@ class Dataverse(object): 'name', 'affiliation', 'description', + 'dataverseContacts' 'dataverseType' ] __attr_misc = [ @@ -53,7 +54,7 @@ def __init__(self): """Metadata""" self.name = None self.alias = None - self.contactEmail = [] + self.dataverseContacts = [] self.affiliation = None self.description = None self.dataverseType = None @@ -126,14 +127,6 @@ def import_metadata(self, filename, format='dv_up'): for attr in self.__attr_valid: if attr in metadata: data[attr] = metadata[attr] - - # get nested metadata and parse it manually - if 'dataverseContacts' in metadata: - data['contactEmail'] = [] - for contact in metadata['dataverseContacts']: - for key, val in contact.items(): - if key == 'contactEmail': - data['contactEmail'].append(val) self.set(data) elif format == 'dv_down': metadata = read_file_json(filename) @@ -158,13 +151,7 @@ def dict(self, format='dv_up'): for attr in self.__attr_valid: if self.__getattribute__(attr): data[attr] = self.__getattribute__(attr) - # prüfen, ob required attributes gesetzt sind. wenn nicht = Exception! - if self.contactEmail: - data['dataverseContacts'] = [] - for email in self.contactEmail: - data['dataverseContacts'].append({'contactEmail': email}) - else: - print('Key contactEmail not in data model.') + # TODO: prüfen, ob required attributes gesetzt sind. wenn nicht = Exception! return data else: print('dict can not be created. Data is not valid for format') @@ -228,6 +215,10 @@ def export_metadata(self, filename, format='dv_up'): class Dataset(object): """Base class for the Dataset model.""" + __attr_misc = [ + 'datafiles' + ] + """Attributes required for Dataset metadata json.""" __attr_required = [ 'title', @@ -260,6 +251,8 @@ class Dataset(object): 'depositor', 'dateOfDeposit', 'kindOfData', + 'seriesName', + 'seriesInformation', 'relatedMaterial', 'relatedDatasets', 'otherReferences', @@ -375,7 +368,8 @@ def __init__(self): self.timePeriodCovered = [] self.dateOfCollection = [] self.kindOfData = [] - self.series = [] + self.seriesName = None + self.seriesInformation = None self.software = [] self.relatedMaterial = [] self.relatedDatasets = [] @@ -400,8 +394,11 @@ def __init__(self): self.collectorTraining = None self.frequencyOfDataCollection = None self.samplingProcedure = None - self.targetSampleSize = [] - self.socialScienceNotes = [] + self.targetSampleActualSize = None + self.targetSampleSizeFormula = None + self.socialScienceNotesType = None + self.socialScienceNotesSubject = None + self.socialScienceNotesText = None self.deviationsFromSampleDesign = None self.collectionMode = None self.researchInstrument = None diff --git a/tests/test_models_dataset.py b/tests/test_models_dataset.py index 0a53ee0..fa79704 100644 --- a/tests/test_models_dataset.py +++ b/tests/test_models_dataset.py @@ -1,7 +1,9 @@ # coding: utf-8 -import pytest +import os from pyDataverse.models import Dataset +TEST_DIR = os.path.dirname(os.path.realpath(__file__)) + class TestDataset(object): """Test the Dataset() class initalization.""" @@ -25,54 +27,175 @@ def test_dataset_is_valid_not(self): pass def test_dataset_import_metadata_dv_up(self): - - """Dataset""" - assert self.license == - assert self.termsOfUse == - assert self.termsOfAccess == - - """Citation""" - assert self.citation_displayName == - assert self.title == - assert self.subtitle == - assert self.alternativeTitle == - assert self.alternativeURL == - assert self.otherId = [] - assert self.author = [] - assert self.datasetContact = [] - assert self.dsDescription = [] - assert self.subject = [] - assert self.keyword = [] - assert self.topicClassification = [] - assert self.publication = [] - assert self.notesText == - assert self.producer = [] - assert self.productionDate == - assert self.productionPlace == - assert self.contributor = [] - assert self.grantNumber = [] - assert self.distributor = [] - assert self.distributionDate == - assert self.depositor == - assert self.dateOfDeposit == - assert self.timePeriodCovered = [] - assert self.dateOfCollection = [] - assert self.kindOfData = [] - assert self.series = [] - assert self.software = [] - assert self.relatedMaterial = [] - assert self.relatedDatasets = [] - assert self.otherReferences = [] - assert self.dataSources = [] - assert self.originOfSources == - assert self.characteristicOfSources == - assert self.accessToSources == - - """Geospatial""" - assert self.geospatial_displayName == - assert self.geographicCoverage = [] - assert self.geographicUnit == - assert self.geographicBoundingBox = [] + ds = Dataset() + ds.import_metadata(TEST_DIR + '/data/dataset_full.json') + + """dataset""" + assert ds.license == 'CC0' + assert ds.termsOfUse == 'CC0 Waiver' + assert ds.termsOfAccess == 'Terms of Access' + + """citation""" + assert ds.citation_displayName == 'Citation Metadata' + assert ds.title == 'Replication Data for: Title' + assert ds.subtitle == 'Subtitle' + assert ds.alternativeTitle == 'Alternative Title' + assert ds.alternativeURL == 'http://AlternativeURL.org' + assert isinstance(ds.otherId, list) + assert len(ds.otherId) == 1 + for d in ds.otherId: + assert d['otherIdAgency'] in ['OtherIDAgency1'] + assert d['otherIdValue'] in ['OtherIDIdentifier1'] + assert isinstance(ds.author, list) + assert len(ds.author) == 1 + for d in ds.author: + assert d['authorName'] in ['LastAuthor1, FirstAuthor1'] + assert d['authorAffiliation'] in ['AuthorAffiliation1'] + assert d['authorIdentifierScheme'] in ['ORCID'] + assert d['authorIdentifier'] in ['AuthorIdentifier1'] + assert isinstance(ds.datasetContact, list) + assert len(ds.datasetContact) == 1 + for d in ds.datasetContact: + assert d['datasetContactName'] in ['LastContact1, FirstContact1'] + assert d['datasetContactAffiliation'] in ['ContactAffiliation1'] + assert d['datasetContactEmail'] in ['ContactEmail1@mailinator.com'] + assert isinstance(ds.dsDescription, list) + assert len(ds.dsDescription) == 1 + for d in ds.dsDescription: + assert d['dsDescriptionValue'] in ['DescriptionText2'] + assert d['dsDescriptionDate'] in ['1000-02-02'] + assert ds.subject == ['Agricultural Sciences', + 'Business and Management', 'Engineering', 'Law'] + assert isinstance(ds.keyword, list) + assert len(ds.keyword) == 1 + for d in ds.keyword: + assert d['keywordValue'] in ['KeywordTerm1'] + assert d['keywordVocabulary'] in ['KeywordVocabulary1'] + assert d['keywordVocabularyURI'] in ['http://KeywordVocabularyURL1.org'] + assert isinstance(ds.topicClassification, list) + assert len(ds.topicClassification) == 1 + for d in ds.topicClassification: + assert d['topicClassValue'] in ['Topic Class Value1'] + assert d['topicClassVocab'] in ['Topic Classification Vocabulary'] + assert isinstance(ds.publication, list) + assert len(ds.publication) == 1 + for d in ds.publication: + assert d['publicationCitation'] in ['RelatedPublicationCitation1'] + assert d['publicationIDType'] in ['ark'] + assert d['publicationIDNumber'] in ['RelatedPublicationIDNumber1'] + assert d['publicationURL'] in ['http://RelatedPublicationURL1.org'] + assert ds.notesText == 'Notes1' + assert isinstance(ds.producer, list) + assert len(ds.producer) == 1 + for d in ds.producer: + assert d['producerName'] in ['LastProducer1, FirstProducer1'] + assert d['producerAffiliation'] in ['ProducerAffiliation1'] + assert d['producerAbbreviation'] in ['ProducerAbbreviation1'] + assert d['producerURL'] in ['http://ProducerURL1.org'] + assert d['producerLogoURL'] in ['http://ProducerLogoURL1.org'] + assert ds.productionDate == '1003-01-01' + assert ds.productionPlace == 'ProductionPlace' + assert isinstance(ds.contributor, list) + assert len(ds.contributor) == 1 + for d in ds.contributor: + assert d['contributorType'] in ['Data Collector'] + assert d['contributorName'] in ['LastContributor1, FirstContributor1'] + assert isinstance(ds.grantNumber, list) + assert len(ds.grantNumber) == 1 + for d in ds.grantNumber: + assert d['grantNumberAgency'] in ['GrantInformationGrantAgency1'] + assert d['grantNumberValue'] in ['GrantInformationGrantNumber1'] + assert isinstance(ds.distributor, list) + assert len(ds.distributor) == 1 + for d in ds.distributor: + assert d['distributorName'] in ['LastDistributor1, FirstDistributor1'] + assert d['distributorAffiliation'] in ['DistributorAffiliation1'] + assert d['distributorAbbreviation'] in ['DistributorAbbreviation1'] + assert d['distributorURL'] in ['http://DistributorURL1.org'] + assert d['distributorLogoURL'] in ['http://DistributorLogoURL1.org'] + assert ds.distributionDate == '1004-01-01' + assert ds.depositor == 'LastDepositor, FirstDepositor' + assert ds.dateOfDeposit == '1002-01-01' + assert isinstance(ds.timePeriodCovered, list) + assert len(ds.timePeriodCovered) == 1 + for d in ds.timePeriodCovered: + assert d['timePeriodCoveredStart'] in ['1005-01-01'] + assert d['timePeriodCoveredEnd'] in ['1005-01-02'] + assert isinstance(ds.dateOfCollection, list) + assert len(ds.dateOfCollection) == 1 + for d in ds.dateOfCollection: + assert d['dateOfCollectionStart'] in ['1006-01-01'] + assert d['dateOfCollectionEnd'] in ['1006-01-01'] + assert ds.kindOfData == ['KindOfData1', 'KindOfData2'] + assert ds.seriesName == 'SeriesName' + assert ds.seriesInformation == 'SeriesInformation' + assert isinstance(ds.software, list) + assert len(ds.software) == 1 + for d in ds.software: + assert d['softwareName'] in ['SoftwareName1'] + assert d['softwareVersion'] in ['SoftwareVersion1'] + assert ds.relatedMaterial == ['RelatedMaterial1', 'RelatedMaterial2'] + assert ds.relatedDatasets == ['RelatedDatasets1', 'RelatedDatasets2'] + assert ds.otherReferences == ['OtherReferences1', 'OtherReferences2'] + assert ds.dataSources == ['DataSources1', 'DataSources2'] + assert ds.originOfSources == 'OriginOfSources' + assert ds.characteristicOfSources == 'CharacteristicOfSourcesNoted' + assert ds.accessToSources == 'DocumentationAndAccessToSources' + + """geospatial""" + assert ds.geospatial_displayName == 'Geospatial Metadata' + assert isinstance(ds.geographicCoverage, list) + assert len(ds.geographicCoverage) == 1 + for d in ds.geographicCoverage: + assert d['country'] in ['Afghanistan'] + assert d['state'] in ['GeographicCoverageStateProvince1'] + assert d['city'] in ['GeographicCoverageCity1'] + assert d['otherGeographicCoverage'] in ['GeographicCoverageOther1'] + assert ds.geographicUnit == ['GeographicUnit1', 'GeographicUnit2'] + assert isinstance(ds.geographicBoundingBox, list) + assert len(ds.geographicBoundingBox) == 1 + for d in ds.geographicBoundingBox: + assert d['westLongitude'] in ['10'] + assert d['eastLongitude'] in ['20'] + assert d['northLongitude'] in ['30'] + assert d['southLongitude'] in ['40'] + + """socialscience""" + assert ds.socialscience_displayName == 'Social Science and Humanities Metadata' + assert ds.unitOfAnalysis == ['UnitOfAnalysis1', 'UnitOfAnalysis2'] + assert ds.universe == ['Universe1', 'Universe2'] + assert ds.timeMethod == 'TimeMethod' + assert ds.dataCollector == 'LastDataCollector1, FirstDataCollector1' + assert ds.collectorTraining == 'CollectorTraining' + assert ds.frequencyOfDataCollection == 'Frequency' + assert ds.samplingProcedure == 'SamplingProcedure' + assert ds.targetSampleActualSize == '100' + assert ds.targetSampleSizeFormula == 'TargetSampleSizeFormula' + assert ds.deviationsFromSampleDesign == 'MajorDeviationsForSampleDesign' + assert ds.collectionMode == 'CollectionMode' + assert ds.researchInstrument == 'TypeOfResearchInstrument' + assert ds.dataCollectionSituation == 'CharacteristicsOfDataCollectionSituation' + assert ds.actionsToMinimizeLoss == 'ActionsToMinimizeLosses' + assert ds.controlOperations == 'ControlOperations' + assert ds.weighting == 'Weighting' + assert ds.cleaningOperations == 'CleaningOperations' + assert ds.datasetLevelErrorNotes == 'StudyLevelErrorNotes' + assert ds.responseRate == 'ResponseRate' + assert ds.samplingErrorEstimates == 'EstimatesOfSamplingError' + assert ds.otherDataAppraisal == 'OtherFormsOfDataAppraisal' + assert ds.socialScienceNotesType == 'NotesType' + assert ds.socialScienceNotesSubject == 'NotesSubject' + assert ds.socialScienceNotesText == 'NotesText' + + """journal""" + assert ds.journal_displayName == 'Journal Metadata' + assert isinstance(ds.journalVolumeIssue, list) + assert len(ds.journalVolumeIssue) == 1 + for d in ds.journalVolumeIssue: + assert d['journalVolume'] in ['JournalVolume1'] + assert d['journalIssue'] in ['JournalIssue1'] + assert d['journalPubDate'] in ['1008-01-01'] + assert ds.journalArticleType == 'abstract' def test_dataset_import_metadata_wrong(self): pass diff --git a/tests/test_models_dataverse.py b/tests/test_models_dataverse.py index 3148717..b784635 100644 --- a/tests/test_models_dataverse.py +++ b/tests/test_models_dataverse.py @@ -1,7 +1,9 @@ # coding: utf-8 -import pytest +import os from pyDataverse.models import Dataverse +TEST_DIR = os.path.dirname(os.path.realpath(__file__)) + class TestDataverse(object): """Test the Api() class initalization.""" @@ -16,14 +18,14 @@ def test_dataverse_init(self): assert not dv.pid assert not dv.name assert not dv.alias - assert isinstance(dv.contactEmail, list) - assert not dv.contactEmail + assert isinstance(dv.dataverseContacts, list) + assert not dv.dataverseContacts assert not dv.affiliation assert not dv.description assert not dv.dataverseType def test_dataverse_set_dv_up(self, read_json): - data = read_json('data/dataverse_minimum_1.json') + data = read_json(TEST_DIR + '/data/dataverse_minimum_1.json') dv = Dataverse() dv.set(data) @@ -34,13 +36,11 @@ def test_dataverse_set_dv_up(self, read_json): assert not dv.pid assert dv.alias == 'test-pyDataverse' assert dv.name == 'Test pyDataverse' - assert isinstance(dv.dataverseContacts, list) assert len(dv.dataverseContacts) == 1 assert dv.dataverseContact[0]['contactEmail'] == 'info@aussda.at' - - def test_dataverse_is_valid(self): - data = read_json('data/dataverse_minimum_1.json') + def test_dataverse_is_valid(self, read_json): + data = read_json(TEST_DIR + '/data/dataverse_minimum_1.json') dv = Dataverse() dv.set(data) @@ -56,8 +56,8 @@ def test_dataverse_is_valid(self): assert dv.dataverseContact[0]['contactEmail'] == 'info@aussda.at' assert dv.is_valid() - def test_dataverse_is_valid_not(self): - data = read_json('data/dataverse_minimum_1.json') + def test_dataverse_is_valid_not(self, read_json): + data = read_json(TEST_DIR + '/data/dataverse_minimum_1.json') dv = Dataverse() dv.set(data) dv.name = None @@ -76,7 +76,7 @@ def test_dataverse_is_valid_not(self): def test_dataverse_import_metadata_dv_up(self): dv = Dataverse() - dv.import_metadata('data/dataverse_minimum_1.json') + dv.import_metadata(TEST_DIR + '/data/dataverse_minimum_1.json') assert isinstance(dv.datasets, list) assert not dv.datasets @@ -91,8 +91,8 @@ def test_dataverse_import_metadata_dv_up(self): def test_dataverse_import_metadata_wrong(self): dv = Dataverse() - dv.import_metadata('data/dataverse_minimum_1.json', 'wrong_data-format') - + dv.import_metadata(TEST_DIR + '/data/dataverse_minimum_1.json', 'wrong_data-format') + assert isinstance(dv.datasets, list) assert not dv.datasets assert isinstance(dv.dataverses, list) @@ -100,8 +100,8 @@ def test_dataverse_import_metadata_wrong(self): assert not dv.pid assert not dv.name assert not dv.alias - assert isinstance(dv.contactEmail, list) - assert not dv.contactEmail + assert isinstance(dv.dataverseContacts, list) + assert not dv.dataverseContacts assert not dv.affiliation assert not dv.description assert not dv.dataverseType From c498a9d6f0e58d1020f4dce9670a0f52ab78e607 Mon Sep 17 00:00:00 2001 From: Stefan Kasberger Date: Mon, 10 Jun 2019 01:05:21 +0200 Subject: [PATCH 24/46] add tests for Dataverse() and Datasets() --- src/pyDataverse/models.py | 21 +- tests/conftest.py | 38 ++- ...{dataset_minimum.json => dataset_min.json} | 0 ...erse_minimum_1.json => dataverse_min.json} | 0 tests/data/dataverse_minimum_2.json | 9 - tests/data/dataverse_minimum_3.json | 9 - tests/test_api.py | 47 ++- tests/test_models_dataset.py | 270 +++++++++++++++--- tests/test_models_dataverse.py | 161 ++++++----- 9 files changed, 423 insertions(+), 132 deletions(-) rename tests/data/{dataset_minimum.json => dataset_min.json} (100%) rename tests/data/{dataverse_minimum_1.json => dataverse_min.json} (100%) delete mode 100644 tests/data/dataverse_minimum_2.json delete mode 100644 tests/data/dataverse_minimum_3.json diff --git a/src/pyDataverse/models.py b/src/pyDataverse/models.py index 708d09b..8f3f966 100644 --- a/src/pyDataverse/models.py +++ b/src/pyDataverse/models.py @@ -35,12 +35,12 @@ class Dataverse(object): 'name', 'affiliation', 'description', - 'dataverseContacts' + 'dataverseContacts', 'dataverseType' ] __attr_misc = [ - 'datasets', - 'dataverses', + # 'datasets', + # 'dataverses', 'pid' ] @@ -190,9 +190,17 @@ def json(self, format='dv_up'): """ if format == 'dv_up': - return dict_to_json(self.dict()) + data = self.dict('dv_up') + if data: + return dict_to_json(data) + else: + return None elif format == 'all': - return dict_to_json(self.dict('all')) + data = self.dict('all') + if data: + return dict_to_json(data) + else: + return None else: # TODO Exception print('data format not valid.') @@ -430,8 +438,7 @@ def set(self, data): """ for key, val in data.items(): - if key: - self.__setattr__(key, val) + self.__setattr__(key, val) def is_valid(self): """Check if metadata stored in attributes is valid for dataverse api upload. diff --git a/tests/conftest.py b/tests/conftest.py index 2d02cd3..d462efe 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,6 +3,8 @@ from pyDataverse.api import Api import pytest +TEST_DIR = os.path.dirname(os.path.realpath(__file__)) + @pytest.fixture(scope='module') def api_connection(): @@ -11,34 +13,54 @@ def api_connection(): return Api(base_url, api_token) -@pytest.fixture def read_json(filename): - j2d(read_file(filename, 'r')) + return j2d(read_file(filename)) -@pytest.fixture def read_file(filename): with open(filename, 'r') as f: data = f.read() return data -@pytest.fixture def write_file(filename, data): with open(filename, 'w') as f: f.write(data) -@pytest.fixture def write_json(filename, data): write_file(filename, d2j(data)) -@pytest.fixture def j2d(data): return json.loads(data) -@pytest.fixture def d2j(data): - json.dumps(data, ensure_ascii=False, indent=2) + return json.dumps(data, ensure_ascii=False, indent=2) + + +@pytest.fixture +def import_dict(): + data = { + 'license': 'CC0', + 'termsOfUse': 'CC0 Waiver', + 'termsOfAccess': 'Terms of Access', + 'citation_displayName': 'Citation Metadata', + 'title': 'Replication Data for: Title' + } + return data + +@pytest.fixture +def import_dataset_full(): + return read_json(TEST_DIR + '/data/dataset_full.json') + + +@pytest.fixture +def import_dataset_min(): + return read_json(TEST_DIR + '/data/dataset_min.json') + + +@pytest.fixture +def import_dataverse_min(): + return read_json(TEST_DIR + '/data/dataverse_min.json') diff --git a/tests/data/dataset_minimum.json b/tests/data/dataset_min.json similarity index 100% rename from tests/data/dataset_minimum.json rename to tests/data/dataset_min.json diff --git a/tests/data/dataverse_minimum_1.json b/tests/data/dataverse_min.json similarity index 100% rename from tests/data/dataverse_minimum_1.json rename to tests/data/dataverse_min.json diff --git a/tests/data/dataverse_minimum_2.json b/tests/data/dataverse_minimum_2.json deleted file mode 100644 index 5439c55..0000000 --- a/tests/data/dataverse_minimum_2.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "alias": "test-pyDataverse-2", - "name": "Test pyDataverse 2", - "dataverseContacts": [ - { - "contactEmail": "info@aussda.at" - } - ] -} diff --git a/tests/data/dataverse_minimum_3.json b/tests/data/dataverse_minimum_3.json deleted file mode 100644 index 45d891b..0000000 --- a/tests/data/dataverse_minimum_3.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "alias": "test-pyDataverse-3", - "name": "Test pyDataverse 3", - "dataverseContacts": [ - { - "contactEmail": "info@aussda.at" - } - ] -} diff --git a/tests/test_api.py b/tests/test_api.py index 7000722..8b14bbc 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -1,10 +1,10 @@ # coding: utf-8 from datetime import datetime -from datetime import timedelta import os from pyDataverse.api import Api from pyDataverse.exceptions import ApiResponseError from pyDataverse.exceptions import ApiUrlError +from pyDataverse.utils import dict_to_json import pytest from requests import Response from time import sleep @@ -65,6 +65,51 @@ def setup_class(cls): """Create the api connection for later use.""" cls.dataverse_id = 'test-pyDataverse' + def test_create_dataverse(self, import_dataverse_min, api_connection): + """Test successfull `.create_dataverse()` request`.""" + if not os.environ.get('TRAVIS'): + api = api_connection + metadata = import_dataverse_min + resp = api.create_dataverse( + self.dataverse_id, dict_to_json(metadata)) + sleep(SLEEP_TIME) + assert isinstance(resp, Response) + assert api.get_dataverse(self.dataverse_id).json() + + def test_create_dataset(self, import_dataset_min, api_connection): + """Test successfull `.create_dataset()` request`.""" + if not os.environ.get('TRAVIS'): + api = api_connection + metadata = import_dataset_min + resp = api.create_dataset(':root', dict_to_json(metadata)) + sleep(SLEEP_TIME) + TestApiRequests.dataset_id = resp.json()['data']['persistentId'] + assert isinstance(resp, Response) + + def test_get_dataset(self, api_connection): + """Test successfull `.get_dataset()` request`.""" + if not os.environ.get('TRAVIS'): + api = api_connection + resp = api.get_dataset(TestApiRequests.dataset_id) + sleep(SLEEP_TIME) + assert isinstance(resp, Response) + + def test_delete_dataset(self, api_connection): + """Test successfull `.delete_dataset()` request`.""" + if not os.environ.get('TRAVIS'): + api = api_connection + resp = api.delete_dataset(TestApiRequests.dataset_id) + sleep(SLEEP_TIME) + assert isinstance(resp, Response) + + def test_delete_dataverse(self, api_connection): + """Test successfull `.delete_dataverse()` request`.""" + if not os.environ.get('TRAVIS'): + api = api_connection + resp = api.delete_dataverse(self.dataverse_id) + sleep(SLEEP_TIME) + assert isinstance(resp, Response) + def test_get_request(self, api_connection): """Test successfull `.get_request()` request.""" # TODO: test params und auth default diff --git a/tests/test_models_dataset.py b/tests/test_models_dataset.py index fa79704..582b299 100644 --- a/tests/test_models_dataset.py +++ b/tests/test_models_dataset.py @@ -1,6 +1,7 @@ # coding: utf-8 import os from pyDataverse.models import Dataset +from pyDataverse.models import Dataverse TEST_DIR = os.path.dirname(os.path.realpath(__file__)) @@ -9,22 +10,129 @@ class TestDataset(object): """Test the Dataset() class initalization.""" def test_dataset_init(self): - pass + ds = Dataset() + + assert isinstance(ds.datafiles, list) + assert len(ds.datafiles) == 0 + + """Metadata: dataset""" + assert not ds.license + assert not ds.termsOfUse + assert not ds.termsOfAccess + + """Metadata: citation""" + assert not ds.citation_displayName + assert not ds.title + assert not ds.subtitle + assert not ds.alternativeTitle + assert not ds.alternativeURL + assert isinstance(ds.otherId, list) + assert len(ds.otherId) == 0 + assert isinstance(ds.author, list) + assert len(ds.author) == 0 + assert isinstance(ds.datasetContact, list) + assert len(ds.datasetContact) == 0 + assert isinstance(ds.dsDescription, list) + assert len(ds.dsDescription) == 0 + assert isinstance(ds.subject, list) + assert len(ds.subject) == 0 + assert isinstance(ds.subject, list) + assert len(ds.subject) == 0 + assert isinstance(ds.topicClassification, list) + assert len(ds.topicClassification) == 0 + assert isinstance(ds.publication, list) + assert len(ds.publication) == 0 + assert not ds.notesText + assert isinstance(ds.producer, list) + assert len(ds.producer) == 0 + assert not ds.productionDate + assert not ds.productionPlace + assert isinstance(ds.contributor, list) + assert len(ds.contributor) == 0 + assert isinstance(ds.grantNumber, list) + assert len(ds.grantNumber) == 0 + assert isinstance(ds.distributor, list) + assert len(ds.distributor) == 0 + assert not ds.distributionDate + assert not ds.depositor + assert not ds.dateOfDeposit + assert isinstance(ds.timePeriodCovered, list) + assert len(ds.timePeriodCovered) == 0 + assert isinstance(ds.dateOfCollection, list) + assert len(ds.dateOfCollection) == 0 + assert isinstance(ds.kindOfData, list) + assert len(ds.kindOfData) == 0 + assert not ds.seriesName + assert not ds.seriesInformation + assert isinstance(ds.software, list) + assert len(ds.software) == 0 + assert isinstance(ds.relatedMaterial, list) + assert len(ds.relatedMaterial) == 0 + assert isinstance(ds.relatedDatasets, list) + assert len(ds.relatedDatasets) == 0 + assert isinstance(ds.otherReferences, list) + assert len(ds.otherReferences) == 0 + assert isinstance(ds.dataSources, list) + assert len(ds.dataSources) == 0 + assert not ds.originOfSources + assert not ds.characteristicOfSources + assert not ds.accessToSources - def test_dataset_set_dvup(self): - pass + """Metadata: geospatial""" + assert not ds.geospatial_displayName + assert isinstance(ds.geographicCoverage, list) + assert len(ds.geographicCoverage) == 0 + assert not ds.geographicUnit + assert isinstance(ds.geographicBoundingBox, list) + assert len(ds.geographicBoundingBox) == 0 - def test_dataset_set_dvup_less(self): - pass + """Metadata: socialscience""" + assert not ds.socialscience_displayName + assert isinstance(ds.unitOfAnalysis, list) + assert len(ds.unitOfAnalysis) == 0 + assert isinstance(ds.universe, list) + assert len(ds.universe) == 0 + assert not ds.timeMethod + assert not ds.dataCollector + assert not ds.collectorTraining + assert not ds.frequencyOfDataCollection + assert not ds.samplingProcedure + assert not ds.targetSampleActualSize + assert not ds.targetSampleSizeFormula + assert not ds.socialScienceNotesType + assert not ds.socialScienceNotesSubject + assert not ds.socialScienceNotesText + assert not ds.deviationsFromSampleDesign + assert not ds.collectionMode + assert not ds.researchInstrument + assert not ds.dataCollectionSituation + assert not ds.actionsToMinimizeLoss + assert not ds.controlOperations + assert not ds.weighting + assert not ds.cleaningOperations + assert not ds.datasetLevelErrorNotes + assert not ds.responseRate + assert not ds.samplingErrorEstimates + assert not ds.otherDataAppraisal - def test_dataset_set_dvup_more(self): - pass + """Metadata: journal""" + assert not ds.journal_displayName + assert isinstance(ds.journalVolumeIssue, list) + assert len(ds.journalVolumeIssue) == 0 + assert not ds.journalArticleType - def test_dataset_is_valid(self): - pass + def test_dataset_is_valid_valid(self, import_dataset_full): + ds = Dataset() + ds.import_metadata(TEST_DIR + '/data/dataset_full.json') + + assert ds.is_valid() + + def test_dataset_is_valid_valid_not(self, import_dataset_full): + ds = Dataset() + ds.import_metadata(TEST_DIR + '/data/dataset_full.json') + ds.title = None - def test_dataset_is_valid_not(self): - pass + assert not ds.is_valid() def test_dataset_import_metadata_dv_up(self): ds = Dataset() @@ -197,35 +305,129 @@ def test_dataset_import_metadata_dv_up(self): assert d['journalPubDate'] in ['1008-01-01'] assert ds.journalArticleType == 'abstract' - def test_dataset_import_metadata_wrong(self): - pass - - def test_dataset_dict_dv_up_valid_minimum(self): - pass + def test_dataset_set_dv_up(self, import_dict): + ds = Dataset() + data = import_dict + ds.set(data) - def test_dataset_dict_dv_up_valid_full(self): - pass + """dataset""" + assert ds.license == 'CC0' + assert ds.termsOfUse == 'CC0 Waiver' + assert ds.termsOfAccess == 'Terms of Access' - def test_dataset_dict_dv_up_valid_not(self): - pass + """citation""" + assert ds.citation_displayName == 'Citation Metadata' + assert ds.title == 'Replication Data for: Title' - def test_dataset_dict_all(self): - pass + def test_dataset_import_metadata_format_wrong(self): + ds = Dataset() + ds.import_metadata(TEST_DIR + '/data/dataset_full.json', 'wrong_data-format') - def test_dataset_dict_wrong(self): - pass + assert isinstance(ds.datafiles, list) + assert len(ds.datafiles) == 0 - def test_dataset_json_dv_up(self): - pass + """Metadata: dataset""" + assert not ds.license + assert not ds.termsOfUse + assert not ds.termsOfAccess - def test_dataset_json_all(self): - pass + """Metadata: citation""" + assert not ds.citation_displayName + assert not ds.title + assert not ds.subtitle + assert not ds.alternativeTitle + assert not ds.alternativeURL + assert isinstance(ds.otherId, list) + assert len(ds.otherId) == 0 + assert isinstance(ds.author, list) + assert len(ds.author) == 0 + assert isinstance(ds.datasetContact, list) + assert len(ds.datasetContact) == 0 + assert isinstance(ds.dsDescription, list) + assert len(ds.dsDescription) == 0 + assert isinstance(ds.subject, list) + assert len(ds.subject) == 0 + assert isinstance(ds.subject, list) + assert len(ds.subject) == 0 + assert isinstance(ds.topicClassification, list) + assert len(ds.topicClassification) == 0 + assert isinstance(ds.publication, list) + assert len(ds.publication) == 0 + assert not ds.notesText + assert isinstance(ds.producer, list) + assert len(ds.producer) == 0 + assert not ds.productionDate + assert not ds.productionPlace + assert isinstance(ds.contributor, list) + assert len(ds.contributor) == 0 + assert isinstance(ds.grantNumber, list) + assert len(ds.grantNumber) == 0 + assert isinstance(ds.distributor, list) + assert len(ds.distributor) == 0 + assert not ds.distributionDate + assert not ds.depositor + assert not ds.dateOfDeposit + assert isinstance(ds.timePeriodCovered, list) + assert len(ds.timePeriodCovered) == 0 + assert isinstance(ds.dateOfCollection, list) + assert len(ds.dateOfCollection) == 0 + assert isinstance(ds.kindOfData, list) + assert len(ds.kindOfData) == 0 + assert not ds.seriesName + assert not ds.seriesInformation + assert isinstance(ds.software, list) + assert len(ds.software) == 0 + assert isinstance(ds.relatedMaterial, list) + assert len(ds.relatedMaterial) == 0 + assert isinstance(ds.relatedDatasets, list) + assert len(ds.relatedDatasets) == 0 + assert isinstance(ds.otherReferences, list) + assert len(ds.otherReferences) == 0 + assert isinstance(ds.dataSources, list) + assert len(ds.dataSources) == 0 + assert not ds.originOfSources + assert not ds.characteristicOfSources + assert not ds.accessToSources - def test_dataset_json_wrong(self): - pass + """Metadata: geospatial""" + assert not ds.geospatial_displayName + assert isinstance(ds.geographicCoverage, list) + assert len(ds.geographicCoverage) == 0 + assert not ds.geographicUnit + assert isinstance(ds.geographicBoundingBox, list) + assert len(ds.geographicBoundingBox) == 0 - def test_dataset_export_metadata_dv_up(self): - pass + """Metadata: socialscience""" + assert not ds.socialscience_displayName + assert isinstance(ds.unitOfAnalysis, list) + assert len(ds.unitOfAnalysis) == 0 + assert isinstance(ds.universe, list) + assert len(ds.universe) == 0 + assert not ds.timeMethod + assert not ds.dataCollector + assert not ds.collectorTraining + assert not ds.frequencyOfDataCollection + assert not ds.samplingProcedure + assert not ds.targetSampleActualSize + assert not ds.targetSampleSizeFormula + assert not ds.socialScienceNotesType + assert not ds.socialScienceNotesSubject + assert not ds.socialScienceNotesText + assert not ds.deviationsFromSampleDesign + assert not ds.collectionMode + assert not ds.researchInstrument + assert not ds.dataCollectionSituation + assert not ds.actionsToMinimizeLoss + assert not ds.controlOperations + assert not ds.weighting + assert not ds.cleaningOperations + assert not ds.datasetLevelErrorNotes + assert not ds.responseRate + assert not ds.samplingErrorEstimates + assert not ds.otherDataAppraisal - def test_dataset_export_metadata_wrong(self): - pass + """Metadata: journal""" + assert not ds.journal_displayName + assert isinstance(ds.journalVolumeIssue, list) + assert len(ds.journalVolumeIssue) == 0 + assert not ds.journalArticleType diff --git a/tests/test_models_dataverse.py b/tests/test_models_dataverse.py index b784635..919b911 100644 --- a/tests/test_models_dataverse.py +++ b/tests/test_models_dataverse.py @@ -1,5 +1,6 @@ # coding: utf-8 import os +from pyDataverse.models import Dataset from pyDataverse.models import Dataverse TEST_DIR = os.path.dirname(os.path.realpath(__file__)) @@ -12,20 +13,20 @@ def test_dataverse_init(self): dv = Dataverse() assert isinstance(dv.datasets, list) - assert not dv.datasets + assert len(dv.datasets) == 0 assert isinstance(dv.dataverses, list) - assert not dv.dataverses + assert len(dv.dataverses) == 0 assert not dv.pid assert not dv.name assert not dv.alias assert isinstance(dv.dataverseContacts, list) - assert not dv.dataverseContacts + assert len(dv.dataverseContacts) == 0 assert not dv.affiliation assert not dv.description assert not dv.dataverseType - def test_dataverse_set_dv_up(self, read_json): - data = read_json(TEST_DIR + '/data/dataverse_minimum_1.json') + def test_dataverse_set_dv_up(self, import_dataverse_min): + data = import_dataverse_min dv = Dataverse() dv.set(data) @@ -37,46 +38,105 @@ def test_dataverse_set_dv_up(self, read_json): assert dv.alias == 'test-pyDataverse' assert dv.name == 'Test pyDataverse' assert len(dv.dataverseContacts) == 1 - assert dv.dataverseContact[0]['contactEmail'] == 'info@aussda.at' + assert dv.dataverseContacts[0]['contactEmail'] == 'info@aussda.at' - def test_dataverse_is_valid(self, read_json): - data = read_json(TEST_DIR + '/data/dataverse_minimum_1.json') + def test_dataverse_is_valid_valid(self, import_dataverse_min): + data = import_dataverse_min dv = Dataverse() dv.set(data) - assert isinstance(dv.datasets, list) - assert not dv.datasets - assert isinstance(dv.dataverses, list) - assert not dv.dataverses - assert not dv.pid - assert dv.alias == 'test-pyDataverse' - assert dv.name == 'Test pyDataverse' - assert isinstance(dv.dataverseContacts, list) - assert len(dv.dataverseContacts) == 1 - assert dv.dataverseContact[0]['contactEmail'] == 'info@aussda.at' assert dv.is_valid() - def test_dataverse_is_valid_not(self, read_json): - data = read_json(TEST_DIR + '/data/dataverse_minimum_1.json') + def test_dataverse_is_valid_not(self, import_dataverse_min): + data = import_dataverse_min dv = Dataverse() dv.set(data) dv.name = None - assert not dv.is_valid() - assert isinstance(dv.datasets, list) - assert not dv.datasets - assert isinstance(dv.dataverses, list) - assert not dv.dataverses - assert not dv.pid - assert dv.alias == 'test-pyDataverse' assert not dv.name - assert isinstance(dv.dataverseContacts, list) - assert len(dv.dataverseContacts) == 1 - assert dv.dataverseContact[0]['contactEmail'] == 'info@aussda.at' + assert not dv.is_valid() + + def test_dataverse_dict_dv_up_valid(self, import_dataverse_min): + data = import_dataverse_min + dv = Dataverse() + dv.set(data) + + assert dv.dict() + assert isinstance(dv.dict(), dict) + + def test_dataverse_dict_valid_all(self, import_dataverse_min): + data = import_dataverse_min + dv = Dataverse() + dv.set(data) + dv.datasets = [Dataset()] + dv.dataverses = [Dataverse()] + dv.pid = 'doi:10.11587/EVMUHP' + data = dv.dict('all') + + assert data + assert isinstance(data, dict) + assert data['alias'] == 'test-pyDataverse' + assert data['name'] == 'Test pyDataverse' + assert data['dataverseContacts'][0]['contactEmail'] == 'info@aussda.at' + assert data['pid'] == 'doi:10.11587/EVMUHP' + + def test_dataverse_dict_format_wrong(self, import_dataverse_min): + data = import_dataverse_min + dv = Dataverse() + dv.set(data) + + assert not dv.dict('wrong') + + def test_dataverse_dict_dv_up_valid_not(self, import_dataverse_min): + data = import_dataverse_min + dv = Dataverse() + dv.set(data) + dv.name = None + + assert not dv.dict() + + def test_dataverse_json_dv_up_valid(self, import_dataverse_min): + data = import_dataverse_min + dv = Dataverse() + dv.set(data) + + assert dv.json() + assert isinstance(dv.json(), str) + + def test_dataverse_json_dv_up_valid_not(self, import_dataverse_min): + data = import_dataverse_min + dv = Dataverse() + dv.set(data) + dv.name = None + + assert not dv.json() + + def test_dataverse_json_valid_all(self, import_dataverse_min): + data = import_dataverse_min + dv = Dataverse() + dv.set(data) + dv.datasets = [Dataset()] + dv.dataverses = [Dataverse()] + dv.pid = 'doi:10.11587/EVMUHP' + data = dv.json('all') + + assert data + assert isinstance(data, str) + + def test_dataverse_json_valid_format_wrong(self, import_dataverse_min): + data = import_dataverse_min + dv = Dataverse() + dv.set(data) + dv.datasets = [Dataset()] + dv.dataverses = [Dataverse()] + dv.pid = 'doi:10.11587/EVMUHP' + data = dv.json('wrong') + + assert not data def test_dataverse_import_metadata_dv_up(self): dv = Dataverse() - dv.import_metadata(TEST_DIR + '/data/dataverse_minimum_1.json') + dv.import_metadata(TEST_DIR + '/data/dataverse_min.json') assert isinstance(dv.datasets, list) assert not dv.datasets @@ -87,51 +147,24 @@ def test_dataverse_import_metadata_dv_up(self): assert dv.name == 'Test pyDataverse' assert isinstance(dv.dataverseContacts, list) assert len(dv.dataverseContacts) == 1 - assert dv.dataverseContact[0]['contactEmail'] == 'info@aussda.at' + assert dv.dataverseContacts[0]['contactEmail'] == 'info@aussda.at' - def test_dataverse_import_metadata_wrong(self): + def test_dataverse_import_metadata_format_wrong(self): dv = Dataverse() - dv.import_metadata(TEST_DIR + '/data/dataverse_minimum_1.json', 'wrong_data-format') + dv.import_metadata(TEST_DIR + '/data/dataverse_min.json', 'wrong_data-format') assert isinstance(dv.datasets, list) + assert len(dv.datasets) == 0 assert not dv.datasets assert isinstance(dv.dataverses, list) + assert len(dv.dataverses) == 0 assert not dv.dataverses assert not dv.pid assert not dv.name assert not dv.alias assert isinstance(dv.dataverseContacts, list) + assert len(dv.dataverseContacts) == 0 assert not dv.dataverseContacts assert not dv.affiliation assert not dv.description assert not dv.dataverseType - - def test_dataverse_dict_dv_up_valid_minimum(self): - pass - - def test_dataverse_dict_dv_up_valid_full(self): - pass - - def test_dataverse_dict_dv_up_valid_not(self): - pass - - def test_dataverse_dict_all(self): - pass - - def test_dataverse_dict_wrong(self): - pass - - def test_dataverse_json_dv_up(self): - pass - - def test_dataverse_json_all(self): - pass - - def test_dataverse_json_wrong(self): - pass - - def test_dataverse_export_metadata_dv_up(self): - pass - - def test_dataverse_export_metadata_wrong(self): - pass From 61aeafbc5071143213c0f4251810c9f49627de95 Mon Sep 17 00:00:00 2001 From: Stefan Kasberger Date: Mon, 10 Jun 2019 01:15:34 +0200 Subject: [PATCH 25/46] fix py27 issue: ensure_ascii=True for dict_to_json --- src/pyDataverse/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pyDataverse/utils.py b/src/pyDataverse/utils.py index e9d92b0..cdd7ee3 100644 --- a/src/pyDataverse/utils.py +++ b/src/pyDataverse/utils.py @@ -46,7 +46,7 @@ def dict_to_json(data): """ try: - return json.dumps(data, ensure_ascii=False, indent=2) + return json.dumps(data, ensure_ascii=True, indent=2) except Exception as e: raise e From 01c448c013c66d8d5342a658d42153b0d6c703f0 Mon Sep 17 00:00:00 2001 From: Stefan Kasberger Date: Mon, 10 Jun 2019 02:15:32 +0200 Subject: [PATCH 26/46] remove flake8 from travis ci; minor improvements in code style --- .travis.yml | 2 - src/pyDataverse/api.py | 3 +- src/pyDataverse/models.py | 86 ++++++++++++++++++++++++--------------- tox.ini | 6 ++- 4 files changed, 59 insertions(+), 38 deletions(-) diff --git a/.travis.yml b/.travis.yml index 4502701..40a4680 100644 --- a/.travis.yml +++ b/.travis.yml @@ -20,8 +20,6 @@ matrix: env: TOXENV=coverage - python: 3.6 env: TOXENV=coveralls - - python: 3.6 - env: TOXENV=flake8 - python: 3.6 env: TOXENV=packaging - python: 3.6 diff --git a/src/pyDataverse/api.py b/src/pyDataverse/api.py index 17cb687..eb6b732 100644 --- a/src/pyDataverse/api.py +++ b/src/pyDataverse/api.py @@ -711,7 +711,8 @@ def get_datafiles(self, doi, version='1'): """ base_str = '/datasets/:persistentId/versions/' - query_str = base_str+'{0}/files?persistentId={1}'.format(version, doi) + query_str = base_str + '{0}/files?persistentId={1}'.format( + version, doi) resp = self.get_request(query_str) return resp diff --git a/src/pyDataverse/models.py b/src/pyDataverse/models.py index 8f3f966..57f67b4 100644 --- a/src/pyDataverse/models.py +++ b/src/pyDataverse/models.py @@ -17,9 +17,12 @@ class Dataverse(object): """Base class for the Dataverse model. * data - * dict: dict mit key value pairs übergeben, wo key exakt das attributist. - * optional: list: liste tuples (links key, rechts value) übergeben, wo key exakt das attribut ist. - * does: set metadata functions: dicts mit key-value pairs übergeben. die keys müssen wie die metadata attribute + * dict: dict mit key value pairs übergeben, wo key exakt das + attributist. + * optional: list: liste tuples (links key, rechts value) übergeben, wo + key exakt das attribut ist. + * does: set metadata functions: dicts mit key-value pairs übergeben. die + keys müssen wie die metadata attribute """ @@ -142,7 +145,8 @@ def dict(self, format='dv_up'): format: all, dv_up - if data is valid will be checked here. so it is not necessary anymore in json(). + if data is valid will be checked here. so it is not necessary anymore + in json(). """ data = {} @@ -151,7 +155,7 @@ def dict(self, format='dv_up'): for attr in self.__attr_valid: if self.__getattribute__(attr): data[attr] = self.__getattribute__(attr) - # TODO: prüfen, ob required attributes gesetzt sind. wenn nicht = Exception! + # TODO: prüfen, ob required attributes gesetzt sind = Exception return data else: print('dict can not be created. Data is not valid for format') @@ -275,17 +279,25 @@ class Dataset(object): [\'datasetVersion\'][\'metadataBlocks\'][\'citation\'][\'fields\'].""" __attr_citation_fields = { 'otherId': ['otherIdAgency', 'otherIdValue'], - 'author': ['authorName', 'authorAffiliation', 'authorIdentifierScheme', 'authorIdentifier'], - 'datasetContact': ['datasetContactName', 'datasetContactAffiliation', 'datasetContactEmail'], + 'author': ['authorName', 'authorAffiliation', 'authorIdentifierScheme', + 'authorIdentifier'], + 'datasetContact': ['datasetContactName', 'datasetContactAffiliation', + 'datasetContactEmail'], 'dsDescription': ['dsDescriptionValue', 'dsDescriptionDate'], - 'keyword': ['keywordValue', 'keywordVocabulary', 'keywordVocabularyURI'], - 'producer': ['producerName', 'producerAffiliation', 'producerAbbreviation', 'producerURL', 'producerLogoURL'], + 'keyword': ['keywordValue', 'keywordVocabulary', + 'keywordVocabularyURI'], + 'producer': ['producerName', 'producerAffiliation', + 'producerAbbreviation', 'producerURL', 'producerLogoURL'], 'contributor': ['contributorType', 'contributorName'], 'grantNumber': ['grantNumberAgency', 'grantNumberValue'], 'topicClassification': ['topicClassValue', 'topicClassVocab'], - 'publication': ['publicationCitation', 'publicationIDType', 'publicationIDNumber', 'publicationURL'], - 'distributor': ['distributorName', 'distributorAffiliation', 'distributorAbbreviation', 'distributorURL', 'distributorLogoURL'], - 'timePeriodCovered': ['timePeriodCoveredStart', 'timePeriodCoveredEnd'], + 'publication': ['publicationCitation', 'publicationIDType', + 'publicationIDNumber', 'publicationURL'], + 'distributor': ['distributorName', 'distributorAffiliation', + 'distributorAbbreviation', 'distributorURL', + 'distributorLogoURL'], + 'timePeriodCovered': ['timePeriodCoveredStart', + 'timePeriodCoveredEnd'], 'dateOfCollection': ['dateOfCollectionStart', 'dateOfCollectionEnd'], 'software': ['softwareName', 'softwareVersion'] } @@ -299,8 +311,10 @@ class Dataset(object): """Attributes in Dataverse metadata json inside [\'datasetVersion\'][\'metadataBlocks\'][\'geospatial\'][\'fields\'].""" __attr_geospatial_fields = { - 'geographicCoverage': ['country', 'state', 'city', 'otherGeographicCoverage'], - 'geographicBoundingBox': ['westLongitude', 'eastLongitude', 'northLongitude', 'southLongitude'] + 'geographicCoverage': ['country', 'state', 'city', + 'otherGeographicCoverage'], + 'geographicBoundingBox': ['westLongitude', 'eastLongitude', + 'northLongitude', 'southLongitude'] } """Attributes on first level of Dataverse metadata json inside @@ -336,7 +350,8 @@ class Dataset(object): """Attributes in Dataverse metadata json inside [\'datasetVersion\'][\'metadataBlocks\'][\'journal\'][\'fields\'].""" __attr_journal_fields = { - 'journalVolumeIssue': ['journalVolume', 'journalIssue', 'journalPubDate'] + 'journalVolumeIssue': ['journalVolume', 'journalIssue', + 'journalPubDate'] } def __init__(self): @@ -474,14 +489,14 @@ def is_valid(self): if authors: for a in authors: if 'authorAffiliation' in a or 'authorIdentifierScheme' in a or 'authorIdentifier' in a: - if not 'authorName' in a: + if 'authorName' not in a: is_valid = False ds_contac = self.__getattribute__('datasetContact') if ds_contac: for c in ds_contac: if 'datasetContactAffiliation' in c or 'datasetContactEmail' in c: - if not 'datasetContactName' in c: + if 'datasetContactName' not in c: is_valid = False producer = self.__getattribute__('producer') @@ -495,14 +510,14 @@ def is_valid(self): if contributor: for c in contributor: if 'contributorType' in c: - if not 'contributorName' in c: + if 'contributorName' not in c: is_valid = False distributor = self.__getattribute__('distributor') if distributor: for d in distributor: if 'distributorAffiliation' in d or 'distributorAbbreviation' in d or 'distributorURL' in d or 'distributorLogoURL' in d: - if not 'distributorName' in d: + if 'distributorName' not in d: is_valid = False bbox = self.__getattribute__('geographicBoundingBox') @@ -565,7 +580,8 @@ def import_metadata(self, filename, format='dv_up'): if 'geospatial' in metadata['datasetVersion']['metadataBlocks']: geospatial = metadata['datasetVersion']['metadataBlocks']['geospatial'] if 'displayName' in geospatial: - self.__setattr__('geospatial_displayName', geospatial['displayName']) + self.__setattr__('geospatial_displayName', + geospatial['displayName']) for field in geospatial['fields']: if field['typeName'] in self.__attr_geospatial: @@ -583,7 +599,8 @@ def import_metadata(self, filename, format='dv_up'): if 'socialscience' in metadata['datasetVersion']['metadataBlocks']: socialscience = metadata['datasetVersion']['metadataBlocks']['socialscience'] if 'displayName' in socialscience: - self.__setattr__('socialscience_displayName', socialscience['displayName']) + self.__setattr__('socialscience_displayName', + socialscience['displayName']) for field in socialscience['fields']: if field['typeName'] in self.__attr_socialscience: @@ -610,7 +627,8 @@ def import_metadata(self, filename, format='dv_up'): if 'journal' in metadata['datasetVersion']['metadataBlocks']: journal = metadata['datasetVersion']['metadataBlocks']['journal'] if 'displayName' in journal: - self.__setattr__('journal_displayName', journal['displayName']) + self.__setattr__('journal_displayName', + journal['displayName']) for field in journal['fields']: if field['typeName'] in self.__attr_journal: @@ -690,14 +708,14 @@ def dict(self, format='dv_up'): citation['fields'].append({ 'typeName': attr, 'value': self.__getattribute__(attr) - }) + }) # Generate fields attributes for key, val in self.__attr_citation_fields.items(): citation['fields'].append({ 'typeName': key, 'value': self.__generate_dicts(key, val) - }) + }) # Generate series attributes if self.__getattribute__('series'): @@ -714,7 +732,7 @@ def dict(self, format='dv_up'): citation['fields'].append({ 'typeName': 'series', 'value': tmp_dict - }) + }) """geospatial""" # Generate first level attributes @@ -722,7 +740,7 @@ def dict(self, format='dv_up'): geospatial['fields'].append({ 'typeName': attr, 'value': self.__getattribute__(attr) - }) + }) # Generate fields attributes for key, val in self.__attr_geospatial_fields.items(): @@ -730,7 +748,7 @@ def dict(self, format='dv_up'): geospatial['fields'].append({ 'typeName': key, 'value': self.__generate_dicts(key, val) - }) + }) """socialscience""" # Generate first level attributes @@ -738,7 +756,7 @@ def dict(self, format='dv_up'): socialscience['fields'].append({ 'typeName': attr, 'value': self.__getattribute__(attr) - }) + }) # Generate targetSampleSize attributes if self.__getattribute__('targetSampleSize'): @@ -755,7 +773,7 @@ def dict(self, format='dv_up'): socialscience['fields'].append({ 'typeName': 'series', 'value': tmp_dict - }) + }) # Generate socialScienceNotes attributes if self.__getattribute__('socialScienceNotes'): @@ -776,7 +794,7 @@ def dict(self, format='dv_up'): socialscience['fields'].append({ 'typeName': 'series', 'value': tmp_dict - }) + }) """journal""" # Generate first level attributes @@ -784,14 +802,14 @@ def dict(self, format='dv_up'): journal['fields'].append({ 'typeName': attr, 'value': self.__getattribute__(attr) - }) + }) # Generate fields attributes for key, val in self.__attr_journal_fields.items(): journal['fields'].append({ 'typeName': key, 'value': self.__generate_dicts(key, val) - }) + }) # TODO: prüfen, ob required attributes gesetzt sind. wenn nicht = Exception! data['datasetVersion']['metadataBlocks']['citation'] = citation @@ -805,7 +823,9 @@ def dict(self, format='dv_up'): return None elif format == 'all': attr_lst = self.__attr_datasetVersion + self.__attr_citation + self.__attr_geospatial + self.__attr_socialscience + self.__attr_journal - for key, val in self.__attr_citation_fields.update(self.__attr_geospatial_fields.update(self.__attr_journal_fields)): + for key, val in self.__attr_citation_fields.update( + self.__attr_geospatial_fields.update( + self.__attr_journal_fields)): attr_lst.append(key) for attr in attr_lst: if self.__getattribute__(attr): diff --git a/tox.ini b/tox.ini index 6c86f05..584500a 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py36,coverage,coveralls,docs,flake8,packaging,dist +envlist = py35,coverage,coveralls,docs,packaging,dist skip_missing_interpreters = True ignore_basepython_conflict = True @@ -8,7 +8,7 @@ description = default settings for unspecified tests usedevelop = False skip_install = False passenv = * -basepython = python3.6 +basepython = python3.5 [testenv:py27] deps = @@ -55,6 +55,8 @@ commands = pytest tests/ --doctest-modules -v --cov=src/pyDataverse [flake8] +max-line-length = 80 +ignore = E129 exclude = src/pyDataverse/docs/source/conf.py .tox From 3b1277f9a4a1236269747d138298ff9da12b531e Mon Sep 17 00:00:00 2001 From: Stefan Kasberger Date: Mon, 10 Jun 2019 21:26:35 +0200 Subject: [PATCH 27/46] update docstrings in Api() --- src/pyDataverse/api.py | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/src/pyDataverse/api.py b/src/pyDataverse/api.py index eb6b732..d4ccc04 100644 --- a/src/pyDataverse/api.py +++ b/src/pyDataverse/api.py @@ -1,6 +1,6 @@ # !/usr/bin/env python # -*- coding: utf-8 -*- -"""Find out more at https://github.com/AUSSDA/pyDataverse.""" +"""Dataverse API connector.""" from datetime import datetime import json from pyDataverse.exceptions import ApiAuthorizationError @@ -17,11 +17,6 @@ import subprocess as sp -""" -Connect and request the Dataverse API Endpoints. Save and use request results. -""" - - class Api(object): """API class. @@ -45,23 +40,33 @@ class Api(object): api_token api_version - Example - ---------- - Create an Api connection:: - - >>> base_url = 'http://demo.dataverse.org' - >>> api = Api(base_url) - >>> api.status - 'OK' - """ def __init__(self, base_url, api_token=None, api_version='v1'): - """Init an `Api()` class. + """Init an Api() class. Scheme, host and path combined create the base-url for the api. See more about url at https://en.wikipedia.org/wiki/URL + Parameters + ---------- + base_url : string + Base url for Dataverse api. + api_token : string + Api token for Dataverse api. + api_version : string + Api version of Dataverse native api. Default is `v1`. + + Examples + ------- + Create an Api connection:: + + >>> from pyDataverse.api import Api + >>> base_url = 'http://demo.dataverse.org' + >>> api = Api(base_url) + >>> api.status + 'OK' + """ # Check and set basic variables. if not isinstance(base_url, ("".__class__, u"".__class__)): From dc9706e4c6f9c708a080f906e3ad0b598780f5c8 Mon Sep 17 00:00:00 2001 From: Stefan Kasberger Date: Mon, 10 Jun 2019 21:27:59 +0200 Subject: [PATCH 28/46] update docstrings in utils.py --- src/pyDataverse/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pyDataverse/utils.py b/src/pyDataverse/utils.py index cdd7ee3..0fb7bd3 100644 --- a/src/pyDataverse/utils.py +++ b/src/pyDataverse/utils.py @@ -1,6 +1,6 @@ # !/usr/bin/env python # -*- coding: utf-8 -*- -"""Find out more at https://github.com/AUSSDA/pyDataverse.""" +"""Dataverse utility functions.""" import csv import json From 1650a233f6c5b0e961e9ddbd6465325fba8d83cb Mon Sep 17 00:00:00 2001 From: Stefan Kasberger Date: Mon, 10 Jun 2019 21:30:37 +0200 Subject: [PATCH 29/46] update docstrings in models.py(); fix issues appearing through first tests in models.py --- src/pyDataverse/models.py | 1121 +++++++++++++++++++++++++------------ 1 file changed, 775 insertions(+), 346 deletions(-) diff --git a/src/pyDataverse/models.py b/src/pyDataverse/models.py index 57f67b4..084723a 100644 --- a/src/pyDataverse/models.py +++ b/src/pyDataverse/models.py @@ -1,6 +1,6 @@ # !/usr/bin/env python # -*- coding: utf-8 -*- -"""Find out more at https://github.com/AUSSDA/pyDataverse.""" +"""Dataverse data-types data model.""" from __future__ import absolute_import from pyDataverse.utils import dict_to_json from pyDataverse.utils import read_file_json @@ -14,26 +14,16 @@ class Dataverse(object): - """Base class for the Dataverse model. + """Base class for Dataverse data model.""" - * data - * dict: dict mit key value pairs übergeben, wo key exakt das - attributist. - * optional: list: liste tuples (links key, rechts value) übergeben, wo - key exakt das attribut ist. - * does: set metadata functions: dicts mit key-value pairs übergeben. die - keys müssen wie die metadata attribute - - """ - - """Attributes required to Dataverse metadata json.""" - __attr_required = [ + """Attributes required for Dataverse metadata json.""" + __attr_required_metadata = [ 'alias', 'name', 'dataverseContacts' ] - """Attributes on first level of Dataverse metadata json.""" - __attr_valid = [ + """Attributes valid for Dataverse metadata json.""" + __attr_valid_metadata = [ 'alias', 'name', 'affiliation', @@ -41,14 +31,24 @@ class Dataverse(object): 'dataverseContacts', 'dataverseType' ] - __attr_misc = [ + """Attributes valid for Dataverse class.""" + __attr_valid_class = [ # 'datasets', # 'dataverses', 'pid' - ] + ] + __attr_valid_metadata def __init__(self): - """Init `Dataverse()` class.""" + """Init a Dataverse() class. + + Examples + ------- + Create a Dataverse:: + + >>> from pyDataverse.models import Dataverse + >>> dv = Dataverse() + + """ """Misc""" self.datasets = [] self.dataverses = [] @@ -67,40 +67,43 @@ def __str__(self): return 'pyDataverse Dataverse() model class.' def set(self, data): - """Set attributes. - - Takes a dict with Key-Value pairs containing dataverse metadata. - Keys: attribute name. named after dataverse up standard. - Value: attribute value. types must be compatible for dataverse up. + """Set class attributes with a flat dict. + + Parameters + ---------- + data : dict + Flat dict with data. Key's must be name the same as the class + attribute, the data should be mapped to. + + Examples + ------- + Set Dataverse attributes via flat dict:: + + >>> from pyDataverse.models import Dataverse + >>> dv = Dataverse() + >>> data = { + >>> 'dataverseContacts': [{'contactEmail': 'test@example.com'}], + >>> 'name': 'Test pyDataverse', + >>> 'alias': 'test-pyDataverse' + >>> } + >>> dv.set(data) + >>> dv.name + 'Test pyDataverse' """ for key, val in data.items(): - self.__setattr__(key, val) - - def is_valid(self): - """Check if metadata stored in attributes is valid for dataverse api upload. - - name, alias and dataverseContact are required fields. dataverseContact - is stored as list of emails in contactEmail, so contactEmail can not be - none. - """ - is_valid = True - for attr in self.__attr_required: - if not self.__getattribute__(attr): - is_valid = False - print('attribute \'{0}\' missing.'.format(attr)) - return is_valid + if key in self.__attr_valid_class: + self.__setattr__(key, val) + else: + # TODO: Raise Exception + print('Key {0} not valid.'.format(key)) def import_metadata(self, filename, format='dv_up'): - """Import data from different sources. - - It is allowed to import incomplete Dataverses, where required - attributes are missing. - - Simmply parse in the data. No validation needed. This will be done - later before the export. + """Import Dataverse metadata from file. - format: dv_up + This simply parses in data with valid attribute naming as keys. + Data must not be complete, and also attributes required for the + metadata json export can be missing. Example: Default dataverse metadata json: { @@ -119,15 +122,30 @@ def import_metadata(self, filename, format='dv_up'): "dataverseType": "LABORATORY" } - filename: string - format: `dv_up`, `dv_down` + Parameters + ---------- + filename : string + Filename with full path. + format : string + Data format of input. Available formats are: `dv_up` for Dataverse + Api upload compatible format. + + Examples + ------- + Import metadata coming from json file:: + + >>> from pyDataverse.models import Dataverse + >>> dv = Dataverse() + >>> dv.import_metadata('tests/data/dataverse_min.json') + >>> dv.name + 'Test pyDataverse' """ data = {} if format == 'dv_up': metadata = read_file_json(filename) # get first level metadata and parse it automatically - for attr in self.__attr_valid: + for attr in self.__attr_valid_metadata: if attr in metadata: data[attr] = metadata[attr] self.set(data) @@ -138,22 +156,85 @@ def import_metadata(self, filename, format='dv_up'): # TODO: Exception print('Data-format not right.') - def dict(self, format='dv_up'): - """Get Dataverse metadata as dict for Dataverse API upload. - - TODO: Validate standard + def is_valid(self): + """Check if set attributes are valid for Dataverse api metadata creation. + + The attributes required are listed in `__attr_required_metadata`. + + Returns + ------- + bool + True, if creation of metadata json is possible. False, if not. + + Examples + ------- + Check if metadata is valid for Dataverse api upload:: + + >>> from pyDataverse.models import Dataverse + >>> dv = Dataverse() + >>> data = { + >>> 'dataverseContacts': [{'contactEmail': 'test@example.com'}], + >>> 'name': 'Test pyDataverse', + >>> 'alias': 'test-pyDataverse' + >>> } + >>> dv.set(data) + >>> dv.is_valid + True + >>> dv.name = None + >>> dv.is_valid + False - format: all, dv_up + """ + is_valid = True + for attr in self.__attr_required_metadata: + if not self.__getattribute__(attr): + is_valid = False + print('attribute \'{0}\' missing.'.format(attr)) + return is_valid - if data is valid will be checked here. so it is not necessary anymore - in json(). + def dict(self, format='dv_up'): + """Create dicts in different data formats. + + `dv_up`: Checks if data is valid for the different dict formats. + + Parameters + ---------- + format : string + Data format for dict creation. Available formats are: `dv_up` with + all metadata for Dataverse api upload, and `all` with all attributes + set. + + Returns + ------- + dict + Data as dict. + + Examples + ------- + Get dict of Dataverse metadata:: + + >>> from pyDataverse.models import Dataverse + >>> dv = Dataverse() + >>> data = { + >>> 'dataverseContacts': [{'contactEmail': 'test@example.com'}], + >>> 'name': 'Test pyDataverse', + >>> 'alias': 'test-pyDataverse' + >>> } + >>> dv.set(data) + >>> data = dv.dict() + >>> data['name'] + 'Test pyDataverse' + + Todo + ------- + Validate standards. """ data = {} if format == 'dv_up': if self.is_valid(): - for attr in self.__attr_valid: - if self.__getattribute__(attr): + for attr in self.__attr_valid_metadata: + if self.__getattribute__(attr) is not None: data[attr] = self.__getattribute__(attr) # TODO: prüfen, ob required attributes gesetzt sind = Exception return data @@ -161,8 +242,8 @@ def dict(self, format='dv_up'): print('dict can not be created. Data is not valid for format') return None elif format == 'all': - for attr in self.__attr_misc + self.__attr_valid: - if self.__getattribute__(attr): + for attr in self.__attr_valid_class: + if self.__getattribute__(attr) is not None: data[attr] = self.__getattribute__(attr) return data else: @@ -171,9 +252,7 @@ def dict(self, format='dv_up'): return None def json(self, format='dv_up'): - """Get Dataverse metadata as json for Dataverse API upload. - - TODO: Validate standard + r"""Create json from attributes. Example: Default dataverse metadata json: { @@ -192,6 +271,38 @@ def json(self, format='dv_up'): "dataverseType": "LABORATORY" } + Parameters + ---------- + format : string + Data format of input. Available formats are: `dv_up` for Dataverse + Api upload compatible format and `all` with all attributes named in + `__attr_valid_class`. + + Returns + ------- + string + json-formatted string of Dataverse metadata for api upload. + + Examples + ------- + Get dict of Dataverse metadata:: + + >>> from pyDataverse.models import Dataverse + >>> dv = Dataverse() + >>> data = { + >>> 'dataverseContacts': [{'contactEmail': 'test@example.com'}], + >>> 'name': 'Test pyDataverse', + >>> 'alias': 'test-pyDataverse' + >>> } + >>> dv.set(data) + >>> data = dv.json() + >>> data + '{\n "name": "Test pyDataverse",\n "dataverseContacts": [\n {\n "contactEmail": "test@example.com"\n }\n ],\n "alias": "test-pyDataverse"\n}' + + Todo + ------- + Validate standards. + """ if format == 'dv_up': data = self.dict('dv_up') @@ -210,11 +321,29 @@ def json(self, format='dv_up'): print('data format not valid.') def export_metadata(self, filename, format='dv_up'): - """Export data to different file-formats. - - exports only to metadata standards. - - format: `dv_up` + """Export Dataverse metadata to Dataverse api upload json. + + Parameters + ---------- + filename : string + Filename with full path. + format : string + Data format for export. Available format is: `dv_up` with all + metadata for Dataverse api upload. + + Examples + ------- + Export Dataverse metadata:: + + >>> from pyDataverse.models import Dataverse + >>> dv = Dataverse() + >>> data = { + >>> 'dataverseContacts': [{'contactEmail': 'test@example.com'}], + >>> 'name': 'Test pyDataverse', + >>> 'alias': 'test-pyDataverse' + >>> } + >>> dv.set(data) + >>> dv.export_metadata('tests/data/dataverse_export.json') """ if format == 'dv_up': @@ -225,14 +354,10 @@ def export_metadata(self, filename, format='dv_up'): class Dataset(object): - """Base class for the Dataset model.""" - - __attr_misc = [ - 'datafiles' - ] + """Base class for the Dataset data model.""" """Attributes required for Dataset metadata json.""" - __attr_required = [ + __attr_required_metadata = [ 'title', 'author', 'datasetContact', @@ -240,17 +365,21 @@ class Dataset(object): 'subject' ] - """Attributes on first level of Dataverse metadata json inside - [\'datasetVersion\'].""" - __attr_datasetVersion = [ + """ + Dataset metadata attributes of Dataverse api upload inside + [\'datasetVersion\']. + """ + __attr_valid_metadata_datasetVersion = [ 'license', 'termsOfUse', 'termsOfAccess' ] - """Attributes on first level of Dataverse metadata json inside - [\'datasetVersion\'][\'metadataBlocks\'][\'citation\'].""" - __attr_citation = [ + """ + Dataset metadata attributes of Dataverse api upload inside + [\'datasetVersion\'][\'metadataBlocks\'][\'citation\']. + """ + __attr_valid_metadata_citation_dicts = [ 'title', 'subtitle', 'alternativeTitle', @@ -275,9 +404,11 @@ class Dataset(object): 'kindOfData' ] - """Attributes in Dataverse metadata json inside - [\'datasetVersion\'][\'metadataBlocks\'][\'citation\'][\'fields\'].""" - __attr_citation_fields = { + """ + Dataset metadata attributes of Dataverse api upload inside + [\'datasetVersion\'][\'metadataBlocks\'][\'citation\'][\'fields\']. + """ + __attr_valid_metadata_citation_arrays = { 'otherId': ['otherIdAgency', 'otherIdValue'], 'author': ['authorName', 'authorAffiliation', 'authorIdentifierScheme', 'authorIdentifier'], @@ -302,24 +433,30 @@ class Dataset(object): 'software': ['softwareName', 'softwareVersion'] } - """Attributes on first level of Dataverse metadata json inside - [\'datasetVersion\'][\'metadataBlocks\'][\'geospatial\'].""" - __attr_geospatial = [ + """ + Dataset metadata attributes of Dataverse api upload inside + [\'datasetVersion\'][\'metadataBlocks\'][\'geospatial\']. + """ + __attr_valid_metadata_geospatial_dicts = [ 'geographicUnit' ] - """Attributes in Dataverse metadata json inside - [\'datasetVersion\'][\'metadataBlocks\'][\'geospatial\'][\'fields\'].""" - __attr_geospatial_fields = { + """ + Dataset metadata attributes of Dataverse api upload inside + [\'datasetVersion\'][\'metadataBlocks\'][\'geospatial\'][\'fields\']. + """ + __attr_valid_metadata_geospatial_arrays = { 'geographicCoverage': ['country', 'state', 'city', 'otherGeographicCoverage'], 'geographicBoundingBox': ['westLongitude', 'eastLongitude', 'northLongitude', 'southLongitude'] } - """Attributes on first level of Dataverse metadata json inside - [\'datasetVersion\'][\'metadataBlocks\'][\'socialscience\'].""" - __attr_socialscience = [ + """ + Dataset metadata attributes of Dataverse api upload inside + [\'datasetVersion\'][\'metadataBlocks\'][\'socialscience\']. + """ + __attr_valid_metadata_socialscience_dicts = [ 'unitOfAnalysis', 'universe', 'timeMethod', @@ -341,21 +478,46 @@ class Dataset(object): 'otherDataAppraisal', ] - """Attributes on first level of Dataverse metadata json inside - [\'datasetVersion\'][\'metadataBlocks\'][\'journal\'].""" - __attr_journal = [ + """ + Dataset metadata attributes of Dataverse api upload inside + [\'datasetVersion\'][\'metadataBlocks\'][\'journal\']. + """ + __attr_valid_metadata_journal_dicts = [ 'journalArticleType' ] - """Attributes in Dataverse metadata json inside - [\'datasetVersion\'][\'metadataBlocks\'][\'journal\'][\'fields\'].""" - __attr_journal_fields = { + """ + Dataset metadata attributes of Dataverse api upload inside + [\'datasetVersion\'][\'metadataBlocks\'][\'journal\'][\'fields\']. + """ + __attr_valid_metadata_journal_arrays = { 'journalVolumeIssue': ['journalVolume', 'journalIssue', 'journalPubDate'] } + """Attributes valid for Dataset class.""" + __attr_valid_class = [ + 'datafiles' + ] + __attr_valid_metadata_datasetVersion \ + + __attr_valid_metadata_citation_dicts \ + + list(__attr_valid_metadata_citation_arrays.keys()) \ + + __attr_valid_metadata_geospatial_dicts \ + + list(__attr_valid_metadata_geospatial_arrays.keys()) \ + + __attr_valid_metadata_socialscience_dicts \ + + __attr_valid_metadata_journal_dicts \ + + list(__attr_valid_metadata_journal_arrays.keys()) \ + def __init__(self): - """Init Dataset() class.""" + """Init a Dataset() class. + + Examples + ------- + Create a Dataverse:: + + >>> from pyDataverse.models import Dataset + >>> ds = Dataset() + + """ """Misc""" self.datafiles = [] @@ -445,94 +607,56 @@ def __str__(self): return 'pyDataverse Dataset() model class.' def set(self, data): - """Set attributes. - - Takes a dict with Key-Value pairs containing dataverse metadata. - Keys: attribute name. named after dataverse up standard. - Value: attribute value. types must be compatible for dataverse up. + """Set class attributes with a flat dict as input. + + Parameters + ---------- + data : dict + Flat dict with data. Key's must be name the same as the class + attribute, the data should be mapped to. + + Examples + ------- + Set Dataverse attributes via flat dict:: + + >>> from pyDataverse.models import Dataset + >>> ds = Dataset() + >>> data = { + >>> 'title': 'pyDataverse study 2019', + >>> 'dsDescription': 'New study about pyDataverse usage in 2019' + >>> } + >>> ds.set(data) + >>> ds.title + 'pyDataverse study 2019' """ for key, val in data.items(): - self.__setattr__(key, val) - - def is_valid(self): - """Check if metadata stored in attributes is valid for dataverse api upload. - - required: ?? - TODO: Test out required fields or ask Harvard. - - """ - is_valid = True - - # check if all required attributes are set - for attr in self.__attr_required: - if not self.__getattribute__(attr): - is_valid = False - print('attribute \'{0}\' missing.'.format(attr)) - - # check if attribute sets are complete where necessary - tp_cov = self.__getattribute__('timePeriodCovered') - if tp_cov: - for tp in tp_cov: - if 'timePeriodCoveredStart' in tp or 'timePeriodCoveredEnd' in tp: - if not ('timePeriodCoveredStart' in tp and 'timePeriodCoveredEnd' in tp): - is_valid = False - - d_coll = self.__getattribute__('dateOfCollection') - if d_coll: - for d in d_coll: - if 'dateOfCollectionStart' in d or 'dateOfCollectionEnd' in d: - if not ('dateOfCollectionStart' in d and 'dateOfCollectionEnd' in d): - is_valid = False - - authors = self.__getattribute__('author') - if authors: - for a in authors: - if 'authorAffiliation' in a or 'authorIdentifierScheme' in a or 'authorIdentifier' in a: - if 'authorName' not in a: - is_valid = False - - ds_contac = self.__getattribute__('datasetContact') - if ds_contac: - for c in ds_contac: - if 'datasetContactAffiliation' in c or 'datasetContactEmail' in c: - if 'datasetContactName' not in c: - is_valid = False - - producer = self.__getattribute__('producer') - if producer: - for p in producer: - if 'producerAffiliation' in p or 'producerAbbreviation' in p or 'producerURL' in p or 'producerLogoURL' in p: - if not p['producerName']: - is_valid = False - - contributor = self.__getattribute__('contributor') - if contributor: - for c in contributor: - if 'contributorType' in c: - if 'contributorName' not in c: - is_valid = False - - distributor = self.__getattribute__('distributor') - if distributor: - for d in distributor: - if 'distributorAffiliation' in d or 'distributorAbbreviation' in d or 'distributorURL' in d or 'distributorLogoURL' in d: - if 'distributorName' not in d: - is_valid = False - - bbox = self.__getattribute__('geographicBoundingBox') - if bbox: - for b in bbox: - if b: - if not ('westLongitude' in b and 'eastLongitude' in b and 'northLongitude' in b and 'southLongitude' in b): - is_valid = False - - return is_valid + if key in self.__attr_valid_class or key == 'citation_displayName' or key == 'geospatial_displayName' or key == 'socialscience_displayName' or key == 'journal_displayName' or key == 'targetSampleActualSize' or key == 'targetSampleSizeFormula' or key == 'socialScienceNotesType' or key == 'socialScienceNotesText' or key == 'socialScienceNotesSubject': + self.__setattr__(key, val) + else: + # TODO: Raise Exception + print('Key {0} not valid.'.format(key)) def import_metadata(self, filename, format='dv_up'): - """Import metadata. - - format: dv_up + """Import Dataset metadata from file. + + Parameters + ---------- + filename : string + Filename with full path. + format : string + Data format of input. Available formats are: `dv_up` for Dataverse + api upload compatible format. + + Examples + ------- + Set Dataverse attributes via flat dict:: + + >>> from pyDataverse.models import Dataset + >>> ds = Dataset() + >>> ds.import_metadata('tests/data/dataset_full.json') + >>> ds.title + 'Replication Data for: Title' """ data = {} @@ -541,7 +665,7 @@ def import_metadata(self, filename, format='dv_up'): """dataset""" # get first level metadata and parse it automatically for key, val in metadata['datasetVersion'].items(): - if key in self.__attr_datasetVersion: + if key in self.__attr_valid_metadata_datasetVersion: data[key] = val # get nested metadata and parse it manually @@ -559,13 +683,13 @@ def import_metadata(self, filename, format='dv_up'): data['citation_displayName'] = citation['displayName'] for field in citation['fields']: - if field['typeName'] in self.__attr_citation: + if field['typeName'] in self.__attr_valid_metadata_citation_dicts: data[field['typeName']] = field['value'] - if field['typeName'] in self.__attr_citation_fields: + if field['typeName'] in self.__attr_valid_metadata_citation_arrays: data[field['typeName']] = self.__parse_dicts( field['value'], - self.__attr_citation_fields[field['typeName']]) + self.__attr_valid_metadata_citation_arrays[field['typeName']]) if field['typeName'] == 'series': if 'seriesName' in field['value']: @@ -584,13 +708,13 @@ def import_metadata(self, filename, format='dv_up'): geospatial['displayName']) for field in geospatial['fields']: - if field['typeName'] in self.__attr_geospatial: + if field['typeName'] in self.__attr_valid_metadata_geospatial_dicts: data[field['typeName']] = field['value'] - if field['typeName'] in self.__attr_geospatial_fields: + if field['typeName'] in self.__attr_valid_metadata_geospatial_arrays: data[field['typeName']] = self.__parse_dicts( field['value'], - self.__attr_geospatial_fields[field['typeName']]) + self.__attr_valid_metadata_geospatial_arrays[field['typeName']]) else: # TODO: Exception print('geospatial not in json') @@ -603,7 +727,7 @@ def import_metadata(self, filename, format='dv_up'): socialscience['displayName']) for field in socialscience['fields']: - if field['typeName'] in self.__attr_socialscience: + if field['typeName'] in self.__attr_valid_metadata_socialscience_dicts: data[field['typeName']] = field['value'] if field['typeName'] == 'targetSampleSize': @@ -631,13 +755,13 @@ def import_metadata(self, filename, format='dv_up'): journal['displayName']) for field in journal['fields']: - if field['typeName'] in self.__attr_journal: + if field['typeName'] in self.__attr_valid_metadata_journal_dicts: data[field['typeName']] = field['value'] - if field['typeName'] in self.__attr_journal_fields: + if field['typeName'] in self.__attr_valid_metadata_journal_arrays: data[field['typeName']] = self.__parse_dicts( field['value'], - self.__attr_journal_fields[field['typeName']]) + self.__attr_valid_metadata_journal_arrays[field['typeName']]) else: # TODO: Exception print('journal not in json') @@ -651,12 +775,19 @@ def import_metadata(self, filename, format='dv_up'): print('Data-format not right') def __parse_dicts(self, data, attr_list): - """Parse out list of dicts. + """Parse out Dataverse api metadata dicts. - data: list of dicts - attr_list: list of attributes to be parsed out. + Parameters + ---------- + data : list + List of Dataverse api metadata fields. + attr_list : list + List of attributes to be parsed. - return: list of dicts + Returns + ------- + list + List of dicts with parsed out key-value pairs. """ data_tmp = [] @@ -672,12 +803,141 @@ def __parse_dicts(self, data, attr_list): return data_tmp - def dict(self, format='dv_up'): - """Get Dataset metadata as dict for Dataverse API upload. + def is_valid(self): + """Checks if attributes available are valid for Dataverse api metadata + creation. + + The attributes required are listed in `__attr_required_metadata`. + + Returns + ------- + bool + True, if creation of metadata json is possible. False, if not. + + Examples + ------- + Check if metadata is valid for Dataverse api upload:: + + >>> from pyDataverse.models import Dataset + >>> ds = Dataset() + >>> data = { + >>> 'title': 'pyDataverse study 2019', + >>> 'dsDescription': 'New study about pyDataverse usage in 2019' + >>> } + >>> ds.set(data) + >>> ds.is_valid() + False + >>> ds.author = [{'authorName': 'LastAuthor1, FirstAuthor1'}] + >>> ds.datasetContact = [{'datasetContactName': 'LastContact1, FirstContact1'}] + >>> ds.subject = ['Engineering'] + >>> ds.is_valid() + True + + Todo + ------- + Test out required fields or ask Harvard. - TODO: Validate standard + """ + is_valid = True - format: dv_up, all + # check if all required attributes are set + for attr in self.__attr_required_metadata: + if not self.__getattribute__(attr): + is_valid = False + print('Metadata not valid: attribute \'{0}\' missing.'.format(attr)) + + # check if attribute sets are complete where necessary + tp_cov = self.__getattribute__('timePeriodCovered') + if tp_cov: + for tp in tp_cov: + if 'timePeriodCoveredStart' in tp or 'timePeriodCoveredEnd' in tp: + if not ('timePeriodCoveredStart' in tp and 'timePeriodCoveredEnd' in tp): + is_valid = False + + d_coll = self.__getattribute__('dateOfCollection') + if d_coll: + for d in d_coll: + if 'dateOfCollectionStart' in d or 'dateOfCollectionEnd' in d: + if not ('dateOfCollectionStart' in d and 'dateOfCollectionEnd' in d): + is_valid = False + + authors = self.__getattribute__('author') + if authors: + for a in authors: + if 'authorAffiliation' in a or 'authorIdentifierScheme' in a or 'authorIdentifier' in a: + if 'authorName' not in a: + is_valid = False + + ds_contac = self.__getattribute__('datasetContact') + if ds_contac: + for c in ds_contac: + if 'datasetContactAffiliation' in c or 'datasetContactEmail' in c: + if 'datasetContactName' not in c: + is_valid = False + + producer = self.__getattribute__('producer') + if producer: + for p in producer: + if 'producerAffiliation' in p or 'producerAbbreviation' in p or 'producerURL' in p or 'producerLogoURL' in p: + if not p['producerName']: + is_valid = False + + contributor = self.__getattribute__('contributor') + if contributor: + for c in contributor: + if 'contributorType' in c: + if 'contributorName' not in c: + is_valid = False + + distributor = self.__getattribute__('distributor') + if distributor: + for d in distributor: + if 'distributorAffiliation' in d or 'distributorAbbreviation' in d or 'distributorURL' in d or 'distributorLogoURL' in d: + if 'distributorName' not in d: + is_valid = False + + bbox = self.__getattribute__('geographicBoundingBox') + if bbox: + for b in bbox: + if b: + if not ('westLongitude' in b and 'eastLongitude' in b and 'northLongitude' in b and 'southLongitude' in b): + is_valid = False + + return is_valid + + def dict(self, format='dv_up'): + """Create dicts in different data formats. + + Parameters + ---------- + format : string + Data format for dict creation. Available formats are: `dv_up` with + all metadata for Dataverse api upload, and `all` with all attributes + set. + + Returns + ------- + dict + Data as dict. + + Examples + ------- + Get dict of Dataverse metadata:: + + >>> from pyDataverse.models import Dataset + >>> ds = Dataset() + >>> data = { + >>> 'title': 'pyDataverse study 2019', + >>> 'dsDescription': 'New study about pyDataverse usage in 2019' + >>> } + >>> ds.set(data) + >>> data = dv.dict() + >>> data['title'] + 'pyDataverse study 2019' + + Todo + ------- + Validate standard """ if format == 'dv_up': @@ -696,39 +956,44 @@ def dict(self, format='dv_up'): """dataset""" # Generate first level attributes - for attr in self.__attr_datasetVersion: - data['datasetVersion'][attr] = self.__getattribute__(attr) + for attr in self.__attr_valid_metadata_datasetVersion: + if self.__getattribute__(attr) is not None: + data['datasetVersion'][attr] = self.__getattribute__(attr) """citation""" if self.citation_displayName: citation['displayName'] = self.citation_displayName # Generate first level attributes - for attr in self.__attr_citation: - citation['fields'].append({ - 'typeName': attr, - 'value': self.__getattribute__(attr) - }) + for attr in self.__attr_valid_metadata_citation_dicts: + if self.__getattribute__(attr) is not None: + citation['fields'].append({ + 'typeName': attr, + 'value': self.__getattribute__(attr) + }) # Generate fields attributes - for key, val in self.__attr_citation_fields.items(): - citation['fields'].append({ - 'typeName': key, - 'value': self.__generate_dicts(key, val) - }) + for key, val in self.__attr_valid_metadata_citation_arrays.items(): + if self.__getattribute__(key) is not None: + citation['fields'].append({ + 'typeName': key, + 'value': self.__generate_dicts(key, val) + }) # Generate series attributes - if self.__getattribute__('series'): + if self.__getattribute__('series') is not None: tmp_dict = {} tmp_dict['value'] = {} if 'seriesName' in self.__getattribute__('series'): - tmp_dict['value']['seriesName'] = {} - tmp_dict['value']['seriesName']['typeName'] = 'seriesName' - tmp_dict['value']['seriesName']['value'] = self.__getattribute__('seriesName') + if self.__getattribute__('seriesName') is not None: + tmp_dict['value']['seriesName'] = {} + tmp_dict['value']['seriesName']['typeName'] = 'seriesName' + tmp_dict['value']['seriesName']['value'] = self.__getattribute__('seriesName') if 'seriesInformation' in self.__getattribute__('series'): - tmp_dict['value']['seriesInformation'] = {} - tmp_dict['value']['seriesInformation']['typeName'] = 'seriesInformation' - tmp_dict['value']['seriesInformation']['value'] = self.__getattribute__('seriesInformation') + if self.__getattribute__('seriesInformation') is not None: + tmp_dict['value']['seriesInformation'] = {} + tmp_dict['value']['seriesInformation']['typeName'] = 'seriesInformation' + tmp_dict['value']['seriesInformation']['value'] = self.__getattribute__('seriesInformation') citation['fields'].append({ 'typeName': 'series', 'value': tmp_dict @@ -736,61 +1001,69 @@ def dict(self, format='dv_up'): """geospatial""" # Generate first level attributes - for attr in self.__attr_geospatial: - geospatial['fields'].append({ - 'typeName': attr, - 'value': self.__getattribute__(attr) - }) + for attr in self.__attr_valid_metadata_geospatial_dicts: + if self.__getattribute__(attr) is not None: + geospatial['fields'].append({ + 'typeName': attr, + 'value': self.__getattribute__(attr) + }) # Generate fields attributes - for key, val in self.__attr_geospatial_fields.items(): + for key, val in self.__attr_valid_metadata_geospatial_arrays.items(): # check if attribute exists - geospatial['fields'].append({ - 'typeName': key, - 'value': self.__generate_dicts(key, val) - }) + if self.__getattribute__(key) is not None: + geospatial['fields'].append({ + 'typeName': key, + 'value': self.__generate_dicts(key, val) + }) """socialscience""" # Generate first level attributes - for attr in self.__attr_socialscience: - socialscience['fields'].append({ - 'typeName': attr, - 'value': self.__getattribute__(attr) - }) + for attr in self.__attr_valid_metadata_socialscience_dicts: + if self.__getattribute__(attr) is not None: + socialscience['fields'].append({ + 'typeName': attr, + 'value': self.__getattribute__(attr) + }) # Generate targetSampleSize attributes - if self.__getattribute__('targetSampleSize'): + if self.__getattribute__('targetSampleSize') is not None: tmp_dict = {} tmp_dict['value'] = {} if 'targetSampleActualSize' in self.__getattribute__('targetSampleSize'): - tmp_dict['value']['targetSampleActualSize'] = {} - tmp_dict['value']['targetSampleActualSize']['typeName'] = 'targetSampleActualSize' - tmp_dict['value']['targetSampleActualSize']['value'] = self.__getattribute__('targetSampleActualSize') + if self.__getattribute__('targetSampleActualSize') is not None: + tmp_dict['value']['targetSampleActualSize'] = {} + tmp_dict['value']['targetSampleActualSize']['typeName'] = 'targetSampleActualSize' + tmp_dict['value']['targetSampleActualSize']['value'] = self.__getattribute__('targetSampleActualSize') if 'targetSampleSizeFormula' in self.__getattribute__('targetSampleSize'): - tmp_dict['value']['targetSampleSizeFormula'] = {} - tmp_dict['value']['targetSampleSizeFormula']['typeName'] = 'targetSampleSizeFormula' - tmp_dict['value']['targetSampleSizeFormula']['value'] = self.__getattribute__('targetSampleSizeFormula') + if self.__getattribute__('targetSampleSizeFormula') is not None: + tmp_dict['value']['targetSampleSizeFormula'] = {} + tmp_dict['value']['targetSampleSizeFormula']['typeName'] = 'targetSampleSizeFormula' + tmp_dict['value']['targetSampleSizeFormula']['value'] = self.__getattribute__('targetSampleSizeFormula') socialscience['fields'].append({ 'typeName': 'series', 'value': tmp_dict }) # Generate socialScienceNotes attributes - if self.__getattribute__('socialScienceNotes'): + if self.__getattribute__('socialScienceNotes') is not None: tmp_dict = {} tmp_dict['value'] = {} if 'socialScienceNotesType' in self.__getattribute__('socialScienceNotes'): - tmp_dict['value']['socialScienceNotesType'] = {} - tmp_dict['value']['socialScienceNotesType']['typeName'] = 'socialScienceNotesType' - tmp_dict['value']['socialScienceNotesType']['value'] = self.__getattribute__('socialScienceNotesType') + if self.__getattribute__('socialScienceNotesType') is not None: + tmp_dict['value']['socialScienceNotesType'] = {} + tmp_dict['value']['socialScienceNotesType']['typeName'] = 'socialScienceNotesType' + tmp_dict['value']['socialScienceNotesType']['value'] = self.__getattribute__('socialScienceNotesType') if 'socialScienceNotesSubject' in self.__getattribute__('socialScienceNotes'): - tmp_dict['value']['socialScienceNotesSubject'] = {} - tmp_dict['value']['socialScienceNotesSubject']['typeName'] = 'socialScienceNotesSubject' - tmp_dict['value']['socialScienceNotesSubject']['value'] = self.__getattribute__('socialScienceNotesSubject') + if self.__getattribute__('socialScienceNotesSubject') is not None: + tmp_dict['value']['socialScienceNotesSubject'] = {} + tmp_dict['value']['socialScienceNotesSubject']['typeName'] = 'socialScienceNotesSubject' + tmp_dict['value']['socialScienceNotesSubject']['value'] = self.__getattribute__('socialScienceNotesSubject') if 'socialScienceNotesText' in self.__getattribute__('socialScienceNotes'): - tmp_dict['value']['socialScienceNotesText'] = {} - tmp_dict['value']['socialScienceNotesText']['typeName'] = 'socialScienceNotesText' - tmp_dict['value']['socialScienceNotesText']['value'] = self.__getattribute__('socialScienceNotesText') + if self.__getattribute__('socialScienceNotesText') is not None: + tmp_dict['value']['socialScienceNotesText'] = {} + tmp_dict['value']['socialScienceNotesText']['typeName'] = 'socialScienceNotesText' + tmp_dict['value']['socialScienceNotesText']['value'] = self.__getattribute__('socialScienceNotesText') socialscience['fields'].append({ 'typeName': 'series', 'value': tmp_dict @@ -798,18 +1071,20 @@ def dict(self, format='dv_up'): """journal""" # Generate first level attributes - for attr in self.__attr_journal: - journal['fields'].append({ - 'typeName': attr, - 'value': self.__getattribute__(attr) - }) + for attr in self.__attr_valid_metadata_journal_dicts: + if self.__getattribute__(attr) is not None: + journal['fields'].append({ + 'typeName': attr, + 'value': self.__getattribute__(attr) + }) # Generate fields attributes - for key, val in self.__attr_journal_fields.items(): - journal['fields'].append({ - 'typeName': key, - 'value': self.__generate_dicts(key, val) - }) + for key, val in self.__attr_valid_metadata_journal_arrays.items(): + if self.__getattribute__(key) is not None: + journal['fields'].append({ + 'typeName': key, + 'value': self.__generate_dicts(key, val) + }) # TODO: prüfen, ob required attributes gesetzt sind. wenn nicht = Exception! data['datasetVersion']['metadataBlocks']['citation'] = citation @@ -822,13 +1097,8 @@ def dict(self, format='dv_up'): print('dict can not be created. Data is not valid for format') return None elif format == 'all': - attr_lst = self.__attr_datasetVersion + self.__attr_citation + self.__attr_geospatial + self.__attr_socialscience + self.__attr_journal - for key, val in self.__attr_citation_fields.update( - self.__attr_geospatial_fields.update( - self.__attr_journal_fields)): - attr_lst.append(key) - for attr in attr_lst: - if self.__getattribute__(attr): + for attr in self.__attr_valid_class: + if self.__getattribute__(attr) is not None: data[attr] = self.__getattribute__(attr) return data @@ -837,7 +1107,21 @@ def dict(self, format='dv_up'): return None def __generate_dicts(self, key, val): - """Parse out list of dicts of metadata attributes for dict export.""" + """Generate dicts for array attributes of Dataverse api metadata upload. + + Parameters + ---------- + key : string + Name of attribute + val : string + Value of attribute. + + Returns + ------- + list + List of filled dicts of metadata for Dataverse api upload. + + """ # check if attribute exists tmp_list = [] if self.__getattribute__(key): @@ -856,8 +1140,38 @@ def __generate_dicts(self, key, val): return tmp_list def json(self, format='dv_up'): - """Get Dataset metadata as json for Dataverse API upload. - + """Create Dataset json from attributes. + + Parameters + ---------- + format : string + Data format of input. Available formats are: `dv_up` for Dataverse + Api upload compatible format and `all` with all attributes named in + `__attr_valid_class`. + + Returns + ------- + string + json-formatted string of Dataverse metadata for api upload. + + Examples + ------- + Get json of Dataverse api upload:: + + >>> from pyDataverse.models import Dataset + >>> ds = Dataset() + >>> data = { + >>> 'title': 'pyDataverse study 2019', + >>> 'dsDescription': 'New study about pyDataverse usage in 2019' + >>> 'author': [{'authorName': 'LastAuthor1, FirstAuthor1'}], + >>> 'datasetContact': [{'datasetContactName': 'LastContact1, FirstContact1'}], + >>> 'subject': ['Engineering'], + >>> } + >>> ds.set(data) + >>> data = ds.json() + + Todo + ------- TODO: Validate standard TODO: Link to default json file @@ -871,9 +1185,30 @@ def json(self, format='dv_up'): print('data format not valid.') def export_metadata(self, filename, format='dv_up'): - """Export data to different file-formats. - - format: `dv_up` + """Export Dataset metadata to Dataverse api upload json. + + Parameters + ---------- + filename : string + Filename with full path. + format : string + Data format for export. Available format is: `dv_up` with all + metadata for Dataverse api upload. + + Examples + ------- + Export metadata to json file:: + + >>> from pyDataverse.models import Dataset + >>> ds = Dataset() + >>> data = { + >>> 'title': 'pyDataverse study 2019', + >>> 'dsDescription': 'New study about pyDataverse usage in 2019' + >>> 'author': [{'authorName': 'LastAuthor1, FirstAuthor1'}], + >>> 'datasetContact': [{'datasetContactName': 'LastContact1, FirstContact1'}], + >>> 'subject': ['Engineering'], + >>> } + >>> ds.export_metadata('tests/data/export_dataset.json') """ if format == 'dv_up': @@ -887,34 +1222,48 @@ class Datafile(object): """Base class for the Datafile model.""" """Attributes required for Datafile metadata json.""" - __attr_required = [ + __attr_required_metadata = [ 'filename', 'pid' ] """Attributes on first level of Datafile metadata json.""" - __attr_valid = [ + __attr_valid_metadata = [ 'description', - 'categories', - 'directoryLabel', + 'pid', 'restrict' ] """Attributes on first level of Datafile metadata json.""" - __attr_misc = [ - 'pid', + __attr_valid_class = [ 'filename' - ] + ] + __attr_valid_metadata def __init__(self, filename=None, pid=None): - """Init `Datafile()` class.""" + """Init a Datafile() class. + + Parameters + ---------- + filename : string + Filename with full path. + pid : string + Persistend identifier, e.g. DOI. + + Examples + ------- + Create a Datafile:: + + >>> from pyDataverse.models import Datafile + >>> df = Datafile() + >>> df + + + """ """Misc""" self.pid = pid self.filename = filename """Metadata""" self.description = None - self.categories = [] - self.directoryLabel = None self.restrict = None def __str__(self): @@ -922,73 +1271,123 @@ def __str__(self): return 'pyDataverse Datafile() model class.' def set(self, data): - """Set attributes. - - Takes a dict with Key-Value pairs containing Datafile metadata. - Keys: attribute name. named after dataverse up standard. - Value: attribute value. types must be compatible for dataverse up. + """Set class attributes with a flat dict. + + Parameters + ---------- + data : dict + Flat dict with data. Key's must be name the same as the class + attribute, the data should be mapped to. + + Examples + ------- + Set Datafile attributes via flat dict:: + + >>> from pyDataverse.models import Datafile + >>> df = Datafile() + >>> data = { + >>> 'pid': 'doi:10.11587/EVMUHP', + >>> 'description': 'Test file', + >>> 'filename': 'tests/data/datafile.txt' + >>> } + >>> df.set(data) + >>> df.pid + 'doi:10.11587/EVMUHP', """ for key, val in data.items(): - self.__setattr__(key, val) + if key in self.__attr_valid_class: + self.__setattr__(key, val) + else: + # TODO: Raise Exception + print('Key {0} not valid.'.format(key)) def is_valid(self): - """Check if metadata stored in attributes is valid for dataverse api upload. - - more + """Check if set attributes are valid for Dataverse api metadata creation. + + Returns + ------- + bool + True, if creation of metadata json is possible. False, if not. + + Examples + ------- + Check if metadata is valid for Dataverse api upload:: + + >>> from pyDataverse.models import Datafile + >>> df = Datafile() + >>> data = { + >>> 'pid': 'doi:10.11587/EVMUHP', + >>> 'description': 'Test file', + >>> 'filename': 'tests/data/datafile.txt' + >>> } + >>> df.set(data) + >>> df.is_valid + True + >>> df.filename = None + >>> df.is_valid + False """ is_valid = True - for attr in self.__attr_required: - if not self.__getattribute__(attr): + for attr in self.__attr_required_metadata: + if self.__getattribute__(attr) is None: is_valid = False print('attribute \'{0}\' missing.'.format(attr)) return is_valid - def import_metadata(self, filename, format='dv_up'): - """Import metadata. - - format: dv_up - - """ - data = {} - if format == 'dv_up': - metadata = read_file_json(filename) - - for attr in self.__attr_valid: - if attr in metadata: - data[attr] = metadata[attr] - - self.set(data) - elif format == 'dv_down': - metadata = read_file_json(filename) - self.set(metadata) - else: - # TODO: Exception - print('Data-format not right') - def dict(self, format='dv_up'): - """Get Dataset metadata as dict for Dataverse API upload. - - TODO: Validate standard + """Create dict in different data formats. + + Parameters + ---------- + format : string + Data format for dict creation. Available formats are: `dv_up` with + all metadata for Dataverse api upload, and `all` with all attributes + set. + + Returns + ------- + dict + Data as dict. + + Examples + ------- + Check if metadata is valid for Dataverse api upload:: + + >>> from pyDataverse.models import Datafile + >>> df = Datafile() + >>> data = { + >>> 'pid': 'doi:10.11587/EVMUHP', + >>> 'description': 'Test file', + >>> 'filename': 'tests/data/datafile.txt' + >>> } + >>> df.set(data) + >>> data = df.dict() + >>> data['description'] + 'Test file' + + Todo + ------- + Validate standards. """ + data = {} if format == 'dv_up': if self.is_valid(): - data = {} - - for attr in self.__attr_valid: - data[attr] = self.__getattribute__(attr) + for attr in self.__attr_valid_metadata: + if self.__getattribute__(attr) is not None: + data[attr] = self.__getattribute__(attr) return data else: print('dict can not be created. Data is not valid') return None elif format == 'all': - for attr in self.__attr_misc + self.__attr_valid: - if self.__getattribute__(attr): + for attr in self.__attr_valid_class: + if self.__getattribute__(attr) is not None: data[attr] = self.__getattribute__(attr) return data else: @@ -997,28 +1396,58 @@ def dict(self, format='dv_up'): return None def json(self, format='dv_up'): - """Get Datafile metadata as json for Dataverse API upload. - - TODO: Validate standard - TODO: Link to default json file + r"""Create json from attributes. + + Parameters + ---------- + format : string + Data format of input. Available formats are: `dv_up` for Dataverse + Api upload compatible format and `all` with all attributes named in + `__attr_valid_class`. + + Returns + ------- + string + json-formatted string of Dataverse metadata for api upload. + + Examples + ------- + Get dict of Dataverse metadata:: + + >>> from pyDataverse.models import Datafile + >>> df = Datafile() + >>> data = { + >>> 'pid': 'doi:10.11587/EVMUHP', + >>> 'description': 'Test file', + >>> 'filename': 'tests/data/datafile.txt' + >>> } + >>> df.set(data) + >>> df.dict() + {'description': 'Test file', + 'directoryLabel': None, + 'restrict': None} + + Todo + ------- + Validate standards. + Link to default json file """ if format == 'dv_up': - return dict_to_json(self.dict()) + data = self.dict('dv_up') + if data: + return dict_to_json(data) + else: + print('Dict can not be created') + return None elif format == 'all': - return dict_to_json(self.dict('all')) + data = self.dict('all') + if data: + return dict_to_json(data) + else: + print('Dict can not be created') + return None else: # TODO Exception print('data format not valid.') - - def export_metadata(self, filename, format='dv_up'): - """Export data to different file-formats. - - format: `dv_up` - - """ - if format == 'dv_up': - return write_file_json(filename, self.dict) - else: - # TODO: Exception - print('Data-format not right.') + return None From d7b03cc868b4f965bd3c0cc2afa0138aa9d6e001 Mon Sep 17 00:00:00 2001 From: Stefan Kasberger Date: Mon, 10 Jun 2019 21:31:30 +0200 Subject: [PATCH 30/46] add docstrings to conftest.py; update fixture names in conftest.py --- tests/conftest.py | 142 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 134 insertions(+), 8 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index d462efe..4846bae 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,3 +1,6 @@ +# !/usr/bin/env python +# -*- coding: utf-8 -*- +"""Find out more at https://github.com/AUSSDA/pyDataverse.""" import json import os from pyDataverse.api import Api @@ -8,40 +11,141 @@ @pytest.fixture(scope='module') def api_connection(): + """Fixture, so set up an Api connection. + + Returns + ------- + Api + Api object. + + """ api_token = os.environ['API_TOKEN'] base_url = os.environ['BASE_URL'] return Api(base_url, api_token) def read_json(filename): + """Read in json file. + + Parameters + ---------- + filename : string + Filename with full path. + + Returns + ------- + dict + File content as dict. + + """ return j2d(read_file(filename)) def read_file(filename): + """Read in file. + + Parameters + ---------- + filename : string + Filename with full path. + + Returns + ------- + string + File content as string. + + """ with open(filename, 'r') as f: data = f.read() return data def write_file(filename, data): + """Write data to file. + + Parameters + ---------- + filename : string + Filename with full path. + data : string + File content as string. + + """ with open(filename, 'w') as f: f.write(data) def write_json(filename, data): + """Write data to json file. + + Parameters + ---------- + filename : string + Filename with full path. + data : dict + File content as dict. + + """ write_file(filename, d2j(data)) def j2d(data): + """Convert json to dict. + + Parameters + ---------- + data : string + JSON-formatted string. + + Returns + ------- + dict + Data as dict. + + """ return json.loads(data) def d2j(data): + """Coinvert dict 2 json. + + Parameters + ---------- + data : dict + Data as dict. + + Returns + ------- + string + JSON-formatted string. + + """ return json.dumps(data, ensure_ascii=False, indent=2) @pytest.fixture -def import_dict(): +def import_dataverse_min_dict(): + """Import minimum Dataverse dict. + + Returns + ------- + dict + Minimum Dataverse metadata. + + """ + return read_json(TEST_DIR + '/data/dataverse_min.json') + + +@pytest.fixture +def import_dataset_min_dict(): + """Import dataset dict. + + Returns + ------- + dict + Dataset metadata. + + """ data = { 'license': 'CC0', 'termsOfUse': 'CC0 Waiver', @@ -51,16 +155,38 @@ def import_dict(): } return data + @pytest.fixture -def import_dataset_full(): - return read_json(TEST_DIR + '/data/dataset_full.json') +def import_datafile_min_dict(): + """Import minimum Datafile dict. + Returns + ------- + dict + Minimum Datafile metadata. -@pytest.fixture -def import_dataset_min(): - return read_json(TEST_DIR + '/data/dataset_min.json') + """ + data = { + 'pid': 'doi:10.11587/EVMUHP', + 'filename': 'tests/data/datafile.txt' + } + return data @pytest.fixture -def import_dataverse_min(): - return read_json(TEST_DIR + '/data/dataverse_min.json') +def import_datafile_full_dict(): + """Import full Datafile dict. + + Returns + ------- + dict + Full Datafile metadata. + + """ + data = { + 'pid': 'doi:10.11587/EVMUHP', + 'filename': 'tests/data/datafile.txt', + 'description': 'Test datafile', + 'restrict': False + } + return data From 24d6b100b2f94a395ab674ec4f0b116ad26709ad Mon Sep 17 00:00:00 2001 From: Stefan Kasberger Date: Mon, 10 Jun 2019 21:33:05 +0200 Subject: [PATCH 31/46] update fixtures and fix issues appearing through testing in test_api.py --- tests/test_api.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/tests/test_api.py b/tests/test_api.py index 8b14bbc..73f08e4 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -21,6 +21,7 @@ def test_api_connect(self): """Test successfull connection without api_token.""" api = Api(os.environ['BASE_URL']) sleep(SLEEP_TIME) + assert isinstance(api, Api) assert not api.api_token assert api.api_version == 'v1' @@ -37,6 +38,7 @@ def test_api_connect_base_url_wrong(self): base_url = 'http://wikipedia.org' api = Api(base_url) sleep(SLEEP_TIME) + assert not api.api_token assert api.api_version == 'v1' assert api.base_url == 'http://wikipedia.org' @@ -48,6 +50,7 @@ def test_api_connect_base_url_wrong(self): base_url = None api = Api(base_url) sleep(SLEEP_TIME) + assert not api.api_token assert api.api_version == 'v1' assert not api.base_url @@ -64,26 +67,29 @@ class TestApiRequests(object): def setup_class(cls): """Create the api connection for later use.""" cls.dataverse_id = 'test-pyDataverse' + cls.dataset_id = None - def test_create_dataverse(self, import_dataverse_min, api_connection): + def test_create_dataverse(self, import_dataverse_min_dict, api_connection): """Test successfull `.create_dataverse()` request`.""" if not os.environ.get('TRAVIS'): api = api_connection - metadata = import_dataverse_min + metadata = import_dataverse_min_dict resp = api.create_dataverse( self.dataverse_id, dict_to_json(metadata)) sleep(SLEEP_TIME) + assert isinstance(resp, Response) assert api.get_dataverse(self.dataverse_id).json() - def test_create_dataset(self, import_dataset_min, api_connection): + def test_create_dataset(self, import_dataset_min_dict, api_connection): """Test successfull `.create_dataset()` request`.""" if not os.environ.get('TRAVIS'): api = api_connection - metadata = import_dataset_min + metadata = import_dataset_min_dict resp = api.create_dataset(':root', dict_to_json(metadata)) sleep(SLEEP_TIME) TestApiRequests.dataset_id = resp.json()['data']['persistentId'] + assert isinstance(resp, Response) def test_get_dataset(self, api_connection): @@ -92,6 +98,7 @@ def test_get_dataset(self, api_connection): api = api_connection resp = api.get_dataset(TestApiRequests.dataset_id) sleep(SLEEP_TIME) + assert isinstance(resp, Response) def test_delete_dataset(self, api_connection): @@ -108,6 +115,7 @@ def test_delete_dataverse(self, api_connection): api = api_connection resp = api.delete_dataverse(self.dataverse_id) sleep(SLEEP_TIME) + assert isinstance(resp, Response) def test_get_request(self, api_connection): @@ -117,6 +125,7 @@ def test_get_request(self, api_connection): query_str = '/info/server' resp = api.get_request(query_str) sleep(SLEEP_TIME) + assert api.status == 'OK' assert isinstance(resp, Response) @@ -125,4 +134,5 @@ def test_get_dataverse(self, api_connection): api = api_connection resp = api.get_dataverse(':root') sleep(SLEEP_TIME) + assert isinstance(resp, Response) From 1cc98a6cfbe6f82111f25cea39438ae625ff8c4d Mon Sep 17 00:00:00 2001 From: Stefan Kasberger Date: Mon, 10 Jun 2019 21:34:29 +0200 Subject: [PATCH 32/46] add docstrings and fix issues appearing through testing in test_models_dataverse.py --- tests/test_models_dataverse.py | 222 ++++++++++++++++++++++++--------- 1 file changed, 163 insertions(+), 59 deletions(-) diff --git a/tests/test_models_dataverse.py b/tests/test_models_dataverse.py index 919b911..ae3e90f 100644 --- a/tests/test_models_dataverse.py +++ b/tests/test_models_dataverse.py @@ -1,4 +1,6 @@ -# coding: utf-8 +# !/usr/bin/env python +# -*- coding: utf-8 -*- +"""Dataverse data model tests.""" import os from pyDataverse.models import Dataset from pyDataverse.models import Dataverse @@ -7,9 +9,10 @@ class TestDataverse(object): - """Test the Api() class initalization.""" + """Tests for Dataverse().""" def test_dataverse_init(self): + """Test Dataverse.__init__().""" dv = Dataverse() assert isinstance(dv.datasets, list) @@ -25,8 +28,17 @@ def test_dataverse_init(self): assert not dv.description assert not dv.dataverseType - def test_dataverse_set_dv_up(self, import_dataverse_min): - data = import_dataverse_min + def test_dataverse_set_dv_up(self, import_dataverse_min_dict): + """Test Dataverse.set() with format=`dv_up`. + + Parameters + ---------- + import_dataverse_min_dict : dict + Fixture, which returns a flat dataset dict() coming from + `tests/data/dataverse_min.json`. + + """ + data = import_dataverse_min_dict dv = Dataverse() dv.set(data) @@ -40,15 +52,70 @@ def test_dataverse_set_dv_up(self, import_dataverse_min): assert len(dv.dataverseContacts) == 1 assert dv.dataverseContacts[0]['contactEmail'] == 'info@aussda.at' - def test_dataverse_is_valid_valid(self, import_dataverse_min): - data = import_dataverse_min + def test_dataverse_import_metadata_dv_up(self): + """Test Dataverse.import_metadata() with format=`dv_up`.""" + dv = Dataverse() + dv.import_metadata(TEST_DIR + '/data/dataverse_min.json') + + assert isinstance(dv.datasets, list) + assert not dv.datasets + assert isinstance(dv.dataverses, list) + assert not dv.dataverses + assert not dv.pid + assert dv.alias == 'test-pyDataverse' + assert dv.name == 'Test pyDataverse' + assert isinstance(dv.dataverseContacts, list) + assert len(dv.dataverseContacts) == 1 + assert dv.dataverseContacts[0]['contactEmail'] == 'info@aussda.at' + + def test_dataverse_import_metadata_format_wrong(self): + """Test Dataverse.import_metadata() with non-valid format.""" + dv = Dataverse() + dv.import_metadata(TEST_DIR + '/data/dataverse_min.json', 'wrong') + + assert isinstance(dv.datasets, list) + assert len(dv.datasets) == 0 + assert not dv.datasets + assert isinstance(dv.dataverses, list) + assert len(dv.dataverses) == 0 + assert not dv.dataverses + assert not dv.pid + assert not dv.name + assert not dv.alias + assert isinstance(dv.dataverseContacts, list) + assert len(dv.dataverseContacts) == 0 + assert not dv.dataverseContacts + assert not dv.affiliation + assert not dv.description + assert not dv.dataverseType + + def test_dataverse_is_valid_valid(self, import_dataverse_min_dict): + """Test Dataverse.is_valid() with valid data. + + Parameters + ---------- + import_dataverse_min_dict : dict + Fixture, which returns a flat dataset dict() coming from + `tests/data/dataverse_min.json`. + + """ + data = import_dataverse_min_dict dv = Dataverse() dv.set(data) assert dv.is_valid() - def test_dataverse_is_valid_not(self, import_dataverse_min): - data = import_dataverse_min + def test_dataverse_is_valid_not(self, import_dataverse_min_dict): + """Test Dataverse.is_valid() with non-valid data. + + Parameters + ---------- + import_dataverse_min_dict : dict + Fixture, which returns a flat dataset dict() coming from + `tests/data/dataverse_min.json`. + + """ + data = import_dataverse_min_dict dv = Dataverse() dv.set(data) dv.name = None @@ -56,16 +123,34 @@ def test_dataverse_is_valid_not(self, import_dataverse_min): assert not dv.name assert not dv.is_valid() - def test_dataverse_dict_dv_up_valid(self, import_dataverse_min): - data = import_dataverse_min + def test_dataverse_dict_dv_up_valid(self, import_dataverse_min_dict): + """Test Dataverse.dict() with format=`dv_up` and valid data. + + Parameters + ---------- + import_dataverse_min_dict : dict + Fixture, which returns a flat dataset dict() coming from + `tests/data/dataverse_min.json`. + + """ + data = import_dataverse_min_dict dv = Dataverse() dv.set(data) assert dv.dict() assert isinstance(dv.dict(), dict) - def test_dataverse_dict_valid_all(self, import_dataverse_min): - data = import_dataverse_min + def test_dataverse_dict_all_valid(self, import_dataverse_min_dict): + """Test Dataverse.dict() with format=`all` and valid data. + + Parameters + ---------- + import_dataverse_min_dict : dict + Fixture, which returns a flat dataset dict() coming from + `tests/data/dataverse_min.json`. + + """ + data = import_dataverse_min_dict dv = Dataverse() dv.set(data) dv.datasets = [Dataset()] @@ -80,39 +165,84 @@ def test_dataverse_dict_valid_all(self, import_dataverse_min): assert data['dataverseContacts'][0]['contactEmail'] == 'info@aussda.at' assert data['pid'] == 'doi:10.11587/EVMUHP' - def test_dataverse_dict_format_wrong(self, import_dataverse_min): - data = import_dataverse_min + def test_dataverse_dict_format_wrong(self, import_dataverse_min_dict): + """Test Dataverse.dict() with non-valid format. + + Parameters + ---------- + import_dataverse_min_dict : dict + Fixture, which returns a flat dataset dict() coming from + `tests/data/dataverse_min.json`. + + """ + data = import_dataverse_min_dict dv = Dataverse() dv.set(data) assert not dv.dict('wrong') - def test_dataverse_dict_dv_up_valid_not(self, import_dataverse_min): - data = import_dataverse_min + def test_dataverse_dict_dv_up_valid_not(self, import_dataverse_min_dict): + """Test Dataverse.dict() with format=`dv_up` and non-valid data. + + Parameters + ---------- + import_dataverse_min_dict : dict + Fixture, which returns a flat dataset dict() coming from + `tests/data/dataverse_min.json`. + + """ + data = import_dataverse_min_dict dv = Dataverse() dv.set(data) dv.name = None assert not dv.dict() - def test_dataverse_json_dv_up_valid(self, import_dataverse_min): - data = import_dataverse_min + def test_dataverse_json_dv_up_valid(self, import_dataverse_min_dict): + """Test Dataverse.json() with format=`dv_up` and valid data. + + Parameters + ---------- + import_dataverse_min_dict : dict + Fixture, which returns a flat dataset dict() coming from + `tests/data/dataverse_min.json`. + + """ + data = import_dataverse_min_dict dv = Dataverse() dv.set(data) assert dv.json() assert isinstance(dv.json(), str) - def test_dataverse_json_dv_up_valid_not(self, import_dataverse_min): - data = import_dataverse_min + def test_dataverse_json_dv_up_valid_not(self, import_dataverse_min_dict): + """Test Dataverse.json() with format=`dv_up` and non-valid data. + + Parameters + ---------- + import_dataverse_min_dict : dict + Fixture, which returns a flat dataset dict() coming from + `tests/data/dataverse_min.json`. + + """ + data = import_dataverse_min_dict dv = Dataverse() dv.set(data) dv.name = None assert not dv.json() - def test_dataverse_json_valid_all(self, import_dataverse_min): - data = import_dataverse_min + def test_dataverse_json_all_valid(self, import_dataverse_min_dict): + """Test Dataverse.json() with format=`all` and valid data. + + Parameters + ---------- + import_dataverse_min_dict : dict + Fixture, which returns a flat dataset dict() coming from + `tests/data/dataverse_min.json`. + + """ + data = import_dataverse_min_dict dv = Dataverse() dv.set(data) dv.datasets = [Dataset()] @@ -123,8 +253,17 @@ def test_dataverse_json_valid_all(self, import_dataverse_min): assert data assert isinstance(data, str) - def test_dataverse_json_valid_format_wrong(self, import_dataverse_min): - data = import_dataverse_min + def test_dataverse_json_format_wrong_valid(self, import_dataverse_min_dict): + """Test Dataverse.json() with non-valid format and valid data. + + Parameters + ---------- + import_dataverse_min_dict : dict + Fixture, which returns a flat dataset dict() coming from + `tests/data/dataverse_min.json`. + + """ + data = import_dataverse_min_dict dv = Dataverse() dv.set(data) dv.datasets = [Dataset()] @@ -133,38 +272,3 @@ def test_dataverse_json_valid_format_wrong(self, import_dataverse_min): data = dv.json('wrong') assert not data - - def test_dataverse_import_metadata_dv_up(self): - dv = Dataverse() - dv.import_metadata(TEST_DIR + '/data/dataverse_min.json') - - assert isinstance(dv.datasets, list) - assert not dv.datasets - assert isinstance(dv.dataverses, list) - assert not dv.dataverses - assert not dv.pid - assert dv.alias == 'test-pyDataverse' - assert dv.name == 'Test pyDataverse' - assert isinstance(dv.dataverseContacts, list) - assert len(dv.dataverseContacts) == 1 - assert dv.dataverseContacts[0]['contactEmail'] == 'info@aussda.at' - - def test_dataverse_import_metadata_format_wrong(self): - dv = Dataverse() - dv.import_metadata(TEST_DIR + '/data/dataverse_min.json', 'wrong_data-format') - - assert isinstance(dv.datasets, list) - assert len(dv.datasets) == 0 - assert not dv.datasets - assert isinstance(dv.dataverses, list) - assert len(dv.dataverses) == 0 - assert not dv.dataverses - assert not dv.pid - assert not dv.name - assert not dv.alias - assert isinstance(dv.dataverseContacts, list) - assert len(dv.dataverseContacts) == 0 - assert not dv.dataverseContacts - assert not dv.affiliation - assert not dv.description - assert not dv.dataverseType From d51d52f0b228ba8626626bf6322ca427c4ac21bf Mon Sep 17 00:00:00 2001 From: Stefan Kasberger Date: Mon, 10 Jun 2019 21:35:13 +0200 Subject: [PATCH 33/46] add docstrings and fix issues appearing through testing --- tests/test_models_dataset.py | 54 +++++++++++++++++++++++------------- 1 file changed, 34 insertions(+), 20 deletions(-) diff --git a/tests/test_models_dataset.py b/tests/test_models_dataset.py index 582b299..6227450 100644 --- a/tests/test_models_dataset.py +++ b/tests/test_models_dataset.py @@ -1,15 +1,17 @@ -# coding: utf-8 +# !/usr/bin/env python +# -*- coding: utf-8 -*- +"""Dataset data model tests.""" import os from pyDataverse.models import Dataset -from pyDataverse.models import Dataverse TEST_DIR = os.path.dirname(os.path.realpath(__file__)) class TestDataset(object): - """Test the Dataset() class initalization.""" + """Tests for Dataset().""" def test_dataset_init(self): + """Test Dataset.__init__().""" ds = Dataset() assert isinstance(ds.datafiles, list) @@ -121,13 +123,37 @@ def test_dataset_init(self): assert len(ds.journalVolumeIssue) == 0 assert not ds.journalArticleType - def test_dataset_is_valid_valid(self, import_dataset_full): + def test_dataset_set_dv_up(self, import_dataset_min_dict): + """Test Dataset.set() with format=`dv_up`. + + Parameters + ---------- + import_dataset_min_dict : dict + Fixture, which returns a flat dataset dict(). + + """ + ds = Dataset() + data = import_dataset_min_dict + ds.set(data) + + """dataset""" + assert ds.license == 'CC0' + assert ds.termsOfUse == 'CC0 Waiver' + assert ds.termsOfAccess == 'Terms of Access' + + """citation""" + assert ds.citation_displayName == 'Citation Metadata' + assert ds.title == 'Replication Data for: Title' + + def test_dataset_is_valid_valid(self): + """Test Dataset.is_valid() with valid data.""" ds = Dataset() ds.import_metadata(TEST_DIR + '/data/dataset_full.json') assert ds.is_valid() - def test_dataset_is_valid_valid_not(self, import_dataset_full): + def test_dataset_is_valid_valid_not(self): + """Test Dataset.is_valid() with non-valid data.""" ds = Dataset() ds.import_metadata(TEST_DIR + '/data/dataset_full.json') ds.title = None @@ -135,6 +161,7 @@ def test_dataset_is_valid_valid_not(self, import_dataset_full): assert not ds.is_valid() def test_dataset_import_metadata_dv_up(self): + """Test Dataset.import_metadata() with format=`dv_up`.""" ds = Dataset() ds.import_metadata(TEST_DIR + '/data/dataset_full.json') @@ -305,23 +332,10 @@ def test_dataset_import_metadata_dv_up(self): assert d['journalPubDate'] in ['1008-01-01'] assert ds.journalArticleType == 'abstract' - def test_dataset_set_dv_up(self, import_dict): - ds = Dataset() - data = import_dict - ds.set(data) - - """dataset""" - assert ds.license == 'CC0' - assert ds.termsOfUse == 'CC0 Waiver' - assert ds.termsOfAccess == 'Terms of Access' - - """citation""" - assert ds.citation_displayName == 'Citation Metadata' - assert ds.title == 'Replication Data for: Title' - def test_dataset_import_metadata_format_wrong(self): + """Test Dataset.import_metadata() with non-valid format.""" ds = Dataset() - ds.import_metadata(TEST_DIR + '/data/dataset_full.json', 'wrong_data-format') + ds.import_metadata(TEST_DIR + '/data/dataset_full.json', 'wrong') assert isinstance(ds.datafiles, list) assert len(ds.datafiles) == 0 From f476c8084a322d5d31eeee9f12c36c80db487fe8 Mon Sep 17 00:00:00 2001 From: Stefan Kasberger Date: Mon, 10 Jun 2019 21:35:36 +0200 Subject: [PATCH 34/46] add test_models_datafiles.py --- tests/test_models_datafile.py | 228 ++++++++++++++++++++++++++++++++++ 1 file changed, 228 insertions(+) create mode 100644 tests/test_models_datafile.py diff --git a/tests/test_models_datafile.py b/tests/test_models_datafile.py new file mode 100644 index 0000000..cbe4f3e --- /dev/null +++ b/tests/test_models_datafile.py @@ -0,0 +1,228 @@ +# !/usr/bin/env python +# -*- coding: utf-8 -*- +"""Datafile data model tests.""" +import os +from pyDataverse.models import Datafile + +TEST_DIR = os.path.dirname(os.path.realpath(__file__)) + + +class TestDatafile(object): + """Tests for Datafile().""" + + def test_datafile_init(self): + """Test Datafile.__init__().""" + df = Datafile() + + assert not df.pid + assert not df.filename + assert not df.description + assert not df.restrict + + df = Datafile('tests/data/datafile.txt', 'doi:10.11587/EVMUHP') + + assert df.pid == 'doi:10.11587/EVMUHP' + assert df.filename == 'tests/data/datafile.txt' + assert not df.description + assert not df.restrict + + def test_datafile_set_dv_up(self, import_datafile_full_dict): + """Test Datafile.set() with format=`dv_up`. + + Parameters + ---------- + import_datafile_full_dict : dict + Fixture, which returns a flat datafile dict(). + + """ + data = import_datafile_full_dict + df = Datafile() + df.set(data) + + assert df.pid == 'doi:10.11587/EVMUHP' + assert df.filename == 'tests/data/datafile.txt' + assert df.description == 'Test datafile' + assert not df.restrict + + def test_datafile_is_valid_valid(self, import_datafile_full_dict): + """Test Datafile.is_valid() with valid data. + + Parameters + ---------- + import_datafile_min_dict : dict + Fixture, which returns a flat datafile dict(). + + """ + data = import_datafile_full_dict + df = Datafile() + df.set(data) + + assert df.pid == 'doi:10.11587/EVMUHP' + assert df.filename == 'tests/data/datafile.txt' + assert df.description == 'Test datafile' + assert not df.restrict + assert df.is_valid() + + def test_datafile_is_valid_not(self, import_datafile_full_dict): + """Test Datafile.is_valid() with non-valid data. + + Parameters + ---------- + import_datafile_min_dict : dict + Fixture, which returns a flat datafile dict(). + + """ + data = import_datafile_full_dict + df = Datafile() + df.set(data) + df.filename = None + + assert df.pid == 'doi:10.11587/EVMUHP' + assert not df.filename + assert df.description == 'Test datafile' + assert not df.restrict + assert not df.is_valid() + + def test_datafile_dict_dv_up_valid(self, import_datafile_full_dict): + """Test Datafile.dict() with format=`dv_up` and valid data. + + Parameters + ---------- + import_datafile_min_dict : dict + Fixture, which returns a flat dataset dict(). + + """ + data = import_datafile_full_dict + df = Datafile() + df.set(data) + data = df.dict() + + assert df.dict('dv_up') + assert data + assert isinstance(data, dict) + assert data['pid'] == 'doi:10.11587/EVMUHP' + assert data['description'] == 'Test datafile' + print(data) + assert not data['restrict'] + + def test_datafile_dict_all_valid(self, import_datafile_full_dict): + """Test Datafile.dict() with format=`all` and valid data. + + Parameters + ---------- + import_datafile_full_dict : dict + Fixture, which returns a flat dataset dict(). + + """ + data = import_datafile_full_dict + df = Datafile() + df.set(data) + data = df.dict('all') + + assert data + assert isinstance(data, dict) + assert data['pid'] == 'doi:10.11587/EVMUHP' + assert data['filename'] == 'tests/data/datafile.txt' + assert data['description'] == 'Test datafile' + assert not data['restrict'] + + def test_datafile_dict_format_wrong(self, import_datafile_full_dict): + """Test Datafile.dict() with non-valid format. + + Parameters + ---------- + import_datafile_min_dict : dict + Fixture, which returns a flat dataset dict(). + + """ + data = import_datafile_full_dict + df = Datafile() + df.set(data) + data = df.dict('wrong') + + assert not data + + def test_datafile_dict_dv_up_valid_not(self, import_datafile_min_dict): + """Test Datafile.dict() with format=`dv_up` and non-valid data. + + Parameters + ---------- + import_datafile_min_dict : dict + Fixture, which returns a flat dataset dict(). + + """ + data = import_datafile_min_dict + df = Datafile() + df.set(data) + df.pid = None + + assert not df.is_valid() + assert df.filename == 'tests/data/datafile.txt' + + def test_datafile_json_dv_up_valid(self, import_datafile_min_dict): + """Test Datafile.json() with format=`dv_up` and valid data. + + Parameters + ---------- + import_datafile_min_dict : dict + Fixture, which returns a flat dataset dict(). + + """ + data = import_datafile_min_dict + df = Datafile() + df.set(data) + data = df.json() + + assert data + assert isinstance(data, str) + + def test_datafile_json_dv_up_valid_not(self, import_datafile_min_dict): + """Test Datafile.json() with format=`dv_up` and non-valid data. + + Parameters + ---------- + import_datafile_min_dict : dict + Fixture, which returns a flat dataset dict(). + + """ + data = import_datafile_min_dict + df = Datafile() + df.set(data) + df.filename = None + + assert not df.is_valid() + print(df.json('dv_up')) + assert not df.json('dv_up') + + def test_datafile_json_all_valid(self, import_datafile_full_dict): + """Test Datafile.json() with format=`all` and valid data. + + Parameters + ---------- + import_datafile_full_dict : dict + Fixture, which returns a flat dataset dict(). + + """ + data = import_datafile_full_dict + df = Datafile() + df.set(data) + data = df.json('all') + + assert data + assert isinstance(data, str) + + def test_datafile_json_format_wrong_valid(self, import_datafile_min_dict): + """Test Datafile.json() with non-valid format and valid data. + + Parameters + ---------- + import_datafile_min_dict : dict + Fixture, which returns a flat dataset dict(). + + """ + data = import_datafile_min_dict + df = Datafile() + df.set(data) + data = df.json('wrong') + + assert not data From 90bb028ca83d5aa26fa7a4a87b9451e6177257e0 Mon Sep 17 00:00:00 2001 From: Stefan Kasberger Date: Mon, 10 Jun 2019 21:58:17 +0200 Subject: [PATCH 35/46] remove json examples from models.py docstrings --- src/pyDataverse/models.py | 60 ++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 35 deletions(-) diff --git a/src/pyDataverse/models.py b/src/pyDataverse/models.py index 084723a..4ea6f09 100644 --- a/src/pyDataverse/models.py +++ b/src/pyDataverse/models.py @@ -105,23 +105,6 @@ def import_metadata(self, filename, format='dv_up'): Data must not be complete, and also attributes required for the metadata json export can be missing. - Example: Default dataverse metadata json: - { - "name": "Scientific Research", - "alias": "science", - "dataverseContacts": [ - { - "contactEmail": "pi@example.edu" - }, - { - "contactEmail": "student@example.edu" - } - ], - "affiliation": "Scientific Research University", - "description": "We do all the science.", - "dataverseType": "LABORATORY" - } - Parameters ---------- filename : string @@ -254,23 +237,6 @@ def dict(self, format='dv_up'): def json(self, format='dv_up'): r"""Create json from attributes. - Example: Default dataverse metadata json: - { - "name": "Scientific Research", - "alias": "science", - "dataverseContacts": [ - { - "contactEmail": "pi@example.edu" - }, - { - "contactEmail": "student@example.edu" - } - ], - "affiliation": "Scientific Research University", - "description": "We do all the science.", - "dataverseType": "LABORATORY" - } - Parameters ---------- format : string @@ -1219,7 +1185,31 @@ def export_metadata(self, filename, format='dv_up'): class Datafile(object): - """Base class for the Datafile model.""" + """Base class for the Datafile model. + + Parameters + ---------- + filename : string + Filename with full path. + pid : type + Description of parameter `pid` (the default is None). + + Attributes + ---------- + description : string + Description of datafile + restrict : bool + Unknown + __attr_required_metadata : list + List with required metadata. + __attr_valid_metadata : list + List with valid metadata for Dataverse api upload. + __attr_valid_class : list + List of all attributes. + pid + filename + + """ """Attributes required for Datafile metadata json.""" __attr_required_metadata = [ From 9b928fe93ad50526d6fd71a8341dd38645cead96 Mon Sep 17 00:00:00 2001 From: Stefan Kasberger Date: Mon, 10 Jun 2019 21:58:53 +0200 Subject: [PATCH 36/46] add models to sphinx documentation --- src/pyDataverse/docs/source/developer.rst | 7 +++++++ src/pyDataverse/docs/source/index.rst | 1 + 2 files changed, 8 insertions(+) diff --git a/src/pyDataverse/docs/source/developer.rst b/src/pyDataverse/docs/source/developer.rst index 05758ab..77102b0 100644 --- a/src/pyDataverse/docs/source/developer.rst +++ b/src/pyDataverse/docs/source/developer.rst @@ -14,6 +14,13 @@ Api Interface :members: +Models Interface +----------------------------- + +.. automodule:: pyDataverse.models + :members: + + Utils Interface ----------------------------- diff --git a/src/pyDataverse/docs/source/index.rst b/src/pyDataverse/docs/source/index.rst index a0d25ce..875b2ff 100644 --- a/src/pyDataverse/docs/source/index.rst +++ b/src/pyDataverse/docs/source/index.rst @@ -81,6 +81,7 @@ Features ----------------------------- - Dataverse Api functionalities to create, get, publish and delete Dataverses, Datasets and Datafiles. +- Dataverse data model for easy manipulation and data conversion. - Utils to support the core functionalities. - Custom exceptions - Tests on `Travis CI `_ (`pytest `_ + `tox `_). From 8f7d91e8881ac2d5e2c24b4b6ef1483320d6d8b2 Mon Sep 17 00:00:00 2001 From: Stefan Kasberger Date: Mon, 10 Jun 2019 22:01:41 +0200 Subject: [PATCH 37/46] add models to readme.md --- README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 32c414a..1538242 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -[![PyPI](https://img.shields.io/pypi/v/pyDataverse.svg)](https://pypi.org/project/pyDataverse/) [![Build Status](https://travis-ci.com/AUSSDA/pyDataverse.svg?branch=master)](https://travis-ci.com/AUSSDA/pyDataverse) [![Coverage Status](https://coveralls.io/repos/github/AUSSDA/pyDataverse/badge.svg)](https://coveralls.io/github/AUSSDA/pyDataverse) [![Documentation Status](https://readthedocs.org/projects/pydataverse/badge/?version=latest)](https://pydataverse.readthedocs.io/en/latest) ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/pydataverse.svg) [![GitHub](https://img.shields.io/github/license/aussda/pydataverse.svg)](https://opensource.org/licenses/MIT) +[![PyPI](https://img.shields.io/pypi/v/pyDataverse.svg)](https://pypi.org/project/pyDataverse/) [![Build Status](https://travis-ci.com/AUSSDA/pyDataverse.svg?branch=master)](https://travis-ci.com/AUSSDA/pyDataverse) [![Coverage Status](https://coveralls.io/repos/github/AUSSDA/pyDataverse/badge.svg)](https://coveralls.io/github/AUSSDA/pyDataverse) [![Documentation Status](https://readthedocs.org/projects/pydataverse/badge/?version=latest)](https://pydataverse.readthedocs.io/en/latest) ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/pydataverse.svg) [![GitHub](https://img.shields.io/github/license/aussda/pydataverse.svg)](https://opensource.org/licenses/MIT) # pyDataverse @@ -8,10 +8,11 @@ pyDataverse is a Python module for [Dataverse](http://dataverse.org/). It uses t * Open Source ([MIT](https://opensource.org/licenses/MIT)) * `api.py`: Dataverse Api functionalities to create, get, publish and delete Dataverses, Datasets and Datafiles. +* `models.py`: Data model for easy manipulation and data conversion. * `utils.py`: Functions to support the core functionalities. * `exceptions.py`: Custom exceptions -* `tests/*`: Tests on [Travis CI](https://travis-ci.com/AUSSDA/pyDataverse) ([pytest](https://docs.pytest.org/en/latest/) + [tox](http://tox.readthedocs.io/)). -* [Documentation](https://pydataverse.readthedocs.io/en/latest/) +* `tests/*`: Tests, tested on [Travis CI](https://travis-ci.com/AUSSDA/pyDataverse) ([pytest](https://docs.pytest.org/en/latest/) + [tox](http://tox.readthedocs.io/)). +* [Documentation](https://pydataverse.readthedocs.io/en/latest/) (Sphinx, ReadTheDocs) * Python 2 and 3 (>=2.7) **Copyright** From e9ebea456bff177e141fa050e7de0010992fd072 Mon Sep 17 00:00:00 2001 From: Stefan Kasberger Date: Tue, 11 Jun 2019 10:34:03 +0200 Subject: [PATCH 38/46] remove get_dataset_metadata from Api() --- src/pyDataverse/api.py | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/src/pyDataverse/api.py b/src/pyDataverse/api.py index f0edbd6..c5eb807 100644 --- a/src/pyDataverse/api.py +++ b/src/pyDataverse/api.py @@ -750,29 +750,6 @@ def delete_dataset(self, identifier, auth=True): print('Dataset {} deleted'.format(identifier)) return resp - def get_dataset_metadata(self, identifier, auth=True): - """Get the metadatablocks of the fiven dataset. - - resp.status_code: - 200: metadata updated - - Parameters - ---------- - identifier : string - Doi of the dataset. e.g. `doi:10.11587/8H3N93`. - auth : bool - Should an api token be sent in the request. Defaults to `False`. - - Returns - ------- - dictionary - Metadata of given dataset - - """ - resp = self.get_dataset(identifier,auth=auth) - return resp.json()["data"]["latestVersion"]["metadataBlocks"]["citation"] - - def edit_dataset_metadata(self, identifier, metadata, is_replace=False, auth=True): """Edit metadata of a given dataset. `Offical documentation Date: Fri, 14 Jun 2019 18:37:22 +0200 Subject: [PATCH 39/46] update docstrings in Api() --- src/pyDataverse/api.py | 379 ++++++++++++++++++++++++++--------------- 1 file changed, 239 insertions(+), 140 deletions(-) diff --git a/src/pyDataverse/api.py b/src/pyDataverse/api.py index c5eb807..c7f5f1a 100644 --- a/src/pyDataverse/api.py +++ b/src/pyDataverse/api.py @@ -47,7 +47,7 @@ def __init__(self, base_url, api_token=None, api_version='v1'): """Init an Api() class. Scheme, host and path combined create the base-url for the api. - See more about url at https://en.wikipedia.org/wiki/URL + See more about URL at `Wikipedia `_. Parameters ---------- @@ -235,7 +235,7 @@ def post_request(self, query_str, metadata=None, auth=False, ) def put_request(self, query_str, metadata=None, auth=False, - params=None): + params=None): """Make a PUT request. Parameters @@ -335,17 +335,17 @@ def delete_request(self, query_str, auth=False, params=None): def get_dataverse(self, identifier, auth=False): """Get dataverse metadata by alias or id. - View data about the dataverse $identified by identifier. Identifier can - be the id number of the dataverse, its alias, or the special - value :root. + View metadata about a dataverse. - GET http://$SERVER/api/dataverses/$id + .. code-block:: bash + + GET http://$SERVER/api/dataverses/$id Parameters ---------- identifier : string - Can either be a dataverse id (long) or a dataverse alias (more - robust). + Can either be a dataverse id (long), a dataverse alias (more + robust), or the special value ``:root``. Returns ------- @@ -362,18 +362,20 @@ def create_dataverse(self, identifier, metadata, auth=True, """Create a dataverse. Generates a new dataverse under identifier. Expects a JSON content - describing the dataverse, as in the example below. If identifier is - omitted, a root dataverse is created. $id can either be a dataverse id - (long) or a dataverse alias (more robust). + describing the dataverse. + + HTTP Request: + + .. code-block:: bash - POST http://$SERVER/api/dataverses/$id?key=$apiKey + POST http://$SERVER/api/dataverses/$id - Download the JSON example file and modified to create dataverses to - suit your needs. The fields name, alias, and dataverseContacts are - required. http://guides.dataverse.org/en/latest/ - _downloads/dataverse-complete.json + Download the `dataverse.json `_ example file and modify to create + dataverses to suit your needs. The fields name, alias, and + dataverseContacts are required. - resp.status_code: + Status Codes: 200: dataverse created 201: dataverse created @@ -381,14 +383,14 @@ def create_dataverse(self, identifier, metadata, auth=True, ---------- identifier : string Can either be a dataverse id (long) or a dataverse alias (more - robust). + robust). If identifier is omitted, a root dataverse is created. metadata : string Metadata of the Dataverse as a json-formatted string. auth : bool - True if api authorization is necessary. Defaults to `True`. + True if api authorization is necessary. Defaults to ``True``. parent : string Parent dataverse, if existing, to which the Dataverse gets attached - to. Defaults to `:root`. + to. Defaults to ``:root``. Returns ------- @@ -426,10 +428,14 @@ def publish_dataverse(self, identifier, auth=True): Publish the Dataverse pointed by identifier, which can either by the dataverse alias or its numerical id. - POST http://$SERVER/api/dataverses/$identifier/actions/:publish + HTTP Request: + + .. code-block:: bash + + POST http://$SERVER/api/dataverses/$identifier/actions/:publish - resp.status_code: - 200: dataverse published + Status Code: + 200: Dataverse published Parameters ---------- @@ -437,7 +443,7 @@ def publish_dataverse(self, identifier, auth=True): Can either be a dataverse id (long) or a dataverse alias (more robust). auth : bool - True if api authorization is necessary. Defaults to `False`. + True if api authorization is necessary. Defaults to ``False``. Returns ------- @@ -474,11 +480,14 @@ def publish_dataverse(self, identifier, auth=True): def delete_dataverse(self, identifier, auth=True): """Delete dataverse by alias or id. - Deletes the dataverse whose ID is given: - DELETE http://$SERVER/api/dataverses/$id?key=$apiKey + HTTP Request: + + .. code-block:: bash + + DELETE http://$SERVER/api/dataverses/$id - resp.status_code: - 200: dataverse deleted + Status Code: + 200: Dataverse deleted Parameters ---------- @@ -523,23 +532,30 @@ def delete_dataverse(self, identifier, auth=True): print('Dataverse {} deleted.'.format(identifier)) return resp - def get_dataset(self, identifier, auth=True, is_doi=True): - """Get metadata of dataset. + def get_dataset(self, identifier, auth=True, is_pid=True): + """Get metadata of a Dataset. With Dataverse identifier: + + .. code-block:: bash + GET http://$SERVER/api/datasets/$identifier - With PID: - GET http://$SERVER/api/datasets/:persistentId/?persistentId=$ID + + With persistent identifier: + + .. code-block:: bash + + GET http://$SERVER/api/datasets/:persistentId/?persistentId=$id GET http://$SERVER/api/datasets/:persistentId/ - ?persistentId=doi:10.5072/FK2/J8SJZB + ?persistentId=$pid Parameters ---------- identifier : string - Doi of the dataset. e.g. `doi:10.11587/8H3N93`. - is_doi : bool - Is the identifier a Doi? Defauls to `True`. So far, the module only - supports Doi's as PID's. + Identifier of the dataset. Can be a Dataverse identifier or a + persistent identifier (e.g. ``doi:10.11587/8H3N93``). + is_pid : bool + True, if identifier is a persistent identifier. Returns ------- @@ -547,7 +563,7 @@ def get_dataset(self, identifier, auth=True, is_doi=True): Response object of requests library. """ - if is_doi: + if is_pid: query_str = '/datasets/:persistentId/?persistentId={0}'.format( identifier) else: @@ -555,22 +571,23 @@ def get_dataset(self, identifier, auth=True, is_doi=True): resp = self.get_request(query_str, auth=auth) return resp - def get_dataset_export(self, identifier, export_format): + def get_dataset_export(self, pid, export_format): """Get metadata of dataset exported in different formats. - CORS Export the metadata of the current published version of a dataset - in various formats: + Export the metadata of the current published version of a dataset + in various formats by its persistend identifier. + + .. code-block:: bash - GET http://$SERVER/api/datasets/ - export?exporter=ddi&persistentId=$persistentId + GET http://$SERVER/api/datasets/export?exporter=$exportformat&persistentId=$pid Parameters ---------- - identifier : string - Doi of the dataset. e.g. `doi:10.11587/8H3N93`. + pid : string + Persistent identifier of the dataset. (e.g. ``doi:10.11587/8H3N93``). export_format : string - Export format as a string. Formats: 'ddi', 'oai_ddi', 'dcterms', - 'oai_dc', 'schema.org', 'dataverse_json'. + Export format as a string. Formats: ``ddi``, ``oai_ddi``, + ``dcterms``, ``oai_dc``, ``schema.org``, ``dataverse_json``. Returns ------- @@ -579,47 +596,62 @@ def get_dataset_export(self, identifier, export_format): """ query_str = '/datasets/export?exporter={0}&persistentId={1}'.format( - export_format, identifier) + export_format, pid) resp = self.get_request(query_str) return resp def create_dataset(self, dataverse, metadata, auth=True): """Add dataset to a dataverse. - http://guides.dataverse.org/en/latest/api/native-api.html#create-a-dataset-in-a-dataverse + `Dataverse Documentation `_ + + HTTP Request: + + .. code-block:: bash + + POST http://$SERVER/api/dataverses/$dataverse/datasets --upload-file - POST http://$SERVER/api/dataverses/$dataverse/datasets --upload-file - FILENAME + Add new dataset with curl: + + .. code-block:: bash - curl -H "X-Dataverse-key: $API_TOKEN" -X POST $SERVER_URL/api/ - dataverses/$DV_ALIAS/datasets/:import?pid=$PERSISTENT_IDENTIFIER& - release=yes --upload-file dataset.json - curl -H "X-Dataverse-key: $API_TOKEN" -X POST $SERVER_URL/api/ - dataverses/$DV_ALIAS/datasets --upload-file dataset-finch1.json + curl -H "X-Dataverse-key: $API_TOKEN" -X POST $SERVER_URL/api/dataverses/$DV_ALIAS/datasets --upload-file tests/data/dataset_min.json + + Import dataset with existing persistend identifier with curl: + + .. code-block:: bash + + curl -H "X-Dataverse-key: $API_TOKEN" -X POST $SERVER_URL/api/dataverses/$DV_ALIAS/datasets/:import?pid=$PERSISTENT_IDENTIFIER&release=yes --upload-file tests/data/dataset_min.json To create a dataset, you must create a JSON file containing all the - metadata you want such as in this example file: dataset-finch1.json. + metadata you want such as example file: `dataset-finch1.json + `_. Then, you must decide which dataverse to create the dataset in and target that datavese with either the "alias" of the dataverse (e.g. "root" or the database id of the dataverse (e.g. "1"). The initial version state will be set to DRAFT: - http://guides.dataverse.org/en/latest/_downloads/dataset-finch1.json - resp.status_code: + Status Code: 201: dataset created Parameters ---------- dataverse : string - Alias of dataverse to which the dataset should be added to. + "alias" of the dataverse (e.g. ``root``) or the database id of the + dataverse (e.g. ``1``) metadata : string - Metadata of the Dataset as a json-formatted string. + Metadata of the Dataset as a json-formatted string (e. g. + `dataset-finch1.json `_ `) Returns ------- requests.Response Response object of requests library. + Todo + ------- + Link Dataset finch1.json + """ query_str = '/dataverses/{0}/datasets'.format(dataverse) resp = self.post_request(query_str, metadata, auth) @@ -640,7 +672,7 @@ def create_dataset(self, dataverse, metadata, auth=True): print('Dataset {} created.'.format(identifier)) return resp - def publish_dataset(self, identifier, type='minor', auth=True): + def publish_dataset(self, pid, type='minor', auth=True): """Publish dataset. Publishes the dataset whose id is passed. If this is the first version @@ -652,7 +684,11 @@ def publish_dataset(self, identifier, type='minor', auth=True): type=updatecurrent to update metadata without changing the version number. - POST http://$SERVER/api/datasets/$id/actions/:publish?type=$type + HTTP Request: + + .. code-block:: bash + + POST http://$SERVER/api/datasets/$id/actions/:publish?type=$type When there are no default workflows, a successful publication process will result in 200 OK response. When there are workflows, it is @@ -663,21 +699,22 @@ def publish_dataset(self, identifier, type='minor', auth=True): has to check the status of the dataset periodically, or perform some push request in the post-publish workflow. - resp.status_code: + Status Code: 200: dataset published Parameters ---------- - identifier : string - Doi of the dataset. e.g. `doi:10.11587/8H3N93`. + pid : string + Persistent identifier of the dataset (e.g. + ``doi:10.11587/8H3N93``). type : string Passing `minor` increases the minor version number (2.3 is - updated to 2.4). - Passing `major` increases the major version number (2.3 is - updated to 3.0). Superusers can pass `updatecurrent` to update - metadata without changing the version number: + updated to 2.4). Passing `major` increases the major version + number (2.3 is updated to 3.0). Superusers can pass + ``updatecurrent` to update metadata without changing the version + number. auth : bool - True if api authorization is necessary. Defaults to `False`. + ``True`` if api authorization is necessary. Defaults to ``False``. Returns ------- @@ -703,19 +740,27 @@ def publish_dataset(self, identifier, type='minor', auth=True): print('Dataset {} published'.format(identifier)) return resp - def delete_dataset(self, identifier, auth=True): + def delete_dataset(self, identifier, is_pid=True, auth=True): """Delete a dataset. - Delete the dataset whose id is passed: - DELETE http://$SERVER/api/datasets/$id?key=$apiKey + Delete the dataset whose id is passed - resp.status_code: + HTTP Request: + + .. code-block:: bash + + DELETE http://$SERVER/api/datasets/$id + + Status Code: 200: dataset deleted Parameters ---------- identifier : string - Doi of the dataset. e.g. `doi:10.11587/8H3N93`. + Identifier of the dataset. Can be a Dataverse identifier or a + persistent identifier (e.g. ``doi:10.11587/8H3N93``). + is_pid : bool + True, if identifier is a persistent identifier. Returns ------- @@ -723,8 +768,11 @@ def delete_dataset(self, identifier, auth=True): Response object of requests library. """ - query_str = '/datasets/:persistentId/?persistentId={0}'.format( - identifier) + if is_pid: + query_str = '/datasets/:persistentId/?persistentId={0}'.format( + identifier) + else: + query_str = '/datasets/{0}'.format(identifier) resp = self.delete_request(query_str, auth=auth) if resp.status_code == 404: @@ -750,11 +798,16 @@ def delete_dataset(self, identifier, auth=True): print('Dataset {} deleted'.format(identifier)) return resp - def edit_dataset_metadata(self, identifier, metadata, is_replace=False, auth=True): - """Edit metadata of a given dataset. `Offical documentation + def edit_dataset_metadata(self, identifier, metadata, is_pid=True, + is_replace=False, auth=True): + """Edit metadata of a given dataset. + + `Offical documentation `_. + HTTP Request: + .. code-block:: bash PUT http://$SERVER/api/datasets/editMetadata/$id --upload-file FILENAME @@ -762,56 +815,52 @@ def edit_dataset_metadata(self, identifier, metadata, is_replace=False, auth=Tru Add data to dataset fields that are blank or accept multiple values with the following + CURL Request: .. code-block:: bash - curl -H "X-Dataverse-key: $API_TOKEN" -X PUT $SERVER_URL/api/datasets/:persistentId/editMetadata/?persistentId=$PID --upload-file dataset-add-metadata.json + curl -H "X-Dataverse-key: $API_TOKEN" -X PUT $SERVER_URL/api/datasets/:persistentId/editMetadata/?persistentId=$pid --upload-file dataset-add-metadata.json For these edits your JSON file need only include those dataset fields which you would like to edit. A sample JSON file may be downloaded here: `dataset-edit-metadata-sample.json `_ - As an example, one could first get and save the metadate of a dataset - - .. code-block:: - - data = api.get_dataset_metadata(DOI,auth=True) - utils.write_file_json(fileName,data) - - Make changes to the file and then update the metadata in dataverse - - .. code-block:: - - data = utils.dict_to_json(utils.read_file_json(fileName)) - resp = api.edit_dataset_metadata(DOI,data,is_replace=True,auth=True) - - - resp.status_code: - 200: metadata updated - Parameters ---------- identifier : string - Doi of the dataset. e.g. `doi:10.11587/8H3N93`. + Identifier of the dataset. Can be a Dataverse identifier or a + persistent identifier (e.g. ``doi:10.11587/8H3N93``). metadata : string Metadata of the Dataset as a json-formatted string. + is_pid : bool + ``True`` to use persistent identifier. ``False``, if not. is_replace : bool - True to replace already existing metadata. + ``True`` to replace already existing metadata. ``False``, if not. auth : bool - Should an api token be sent in the request. Defaults to `False`. + ``True``, if an api token should be sent. Defaults to ``False``. Returns ------- requests.Response Response object of requests library. - """ + Examples + ------- + Get dataset metadata:: + + >>> data = api.get_dataset_metadata(doi, auth=True) + >>> resp = api.edit_dataset_metadata(doi, data, is_replace=True, auth=True) + >>> resp.status_code + 200: metadata updated - query_str = '/datasets/:persistentId/editMetadata/?persistentId={0}'.format( - identifier) + """ + if is_pid: + query_str = '/datasets/:persistentId/editMetadata/?persistentId={0}' + ''.format(identifier) + else: + query_str = '/datasets/editMetadata/{0}'.format(identifier) params = {'replace': True} if is_replace else {} - #if is_replace: query_str += "&replace=true" resp = self.put_request(query_str, metadata, auth, params) @@ -829,21 +878,24 @@ def edit_dataset_metadata(self, identifier, metadata, is_replace=False, auth=Tru 'and does not allow multiples. ' + 'Use is_replace=true to replace existing data.') elif resp.status_code == 200: - # time = resp.json()['data']['lastUpdateTime'] - print('Dataset updated')# - {}.'.format(time)) + print('Dataset {0} updated'.format(identifier)) return resp - def get_datafiles(self, doi, version='1'): + def get_datafiles(self, pid, version='1'): """List metadata of all datafiles of a dataset. - http://guides.dataverse.org/en/latest/api/native-api.html#list-files-in-a-dataset - GET http://$SERVER/api/datasets/$id/versions/$versionId/ - files?key=$apiKey + `Documentation `_ + + HTTP Request: + + .. code-block:: bash + + GET http://$SERVER/api/datasets/$id/versions/$versionId/files Parameters ---------- - doi : string - Doi of the dataset. e.g. `doi:10.11587/8H3N93`. + pid : string + Persistent identifier of the dataset. e.g. ``doi:10.11587/8H3N93``. version : string Version of dataset. Defaults to `1`. @@ -855,23 +907,32 @@ def get_datafiles(self, doi, version='1'): """ base_str = '/datasets/:persistentId/versions/' query_str = base_str + '{0}/files?persistentId={1}'.format( - version, doi) + version, pid) resp = self.get_request(query_str) return resp - def get_datafile(self, identifier): + def get_datafile(self, identifier, is_pid=True): """Download a datafile via the Dataverse Data Access API. - File ID + Get by file id (HTTP Request). + + .. code-block:: bash + GET /api/access/datafile/$id - DOI - GET http://$SERVER/api/access/datafile/ - :persistentId/?persistentId=doi:10.5072/FK2/J8SJZB + + Get by persistent identifier (HTTP Request). + + .. code-block:: bash + + GET http://$SERVER/api/access/datafile/:persistentId/?persistentId=doi:10.5072/FK2/J8SJZB Parameters ---------- identifier : string - Doi of the dataset. e.g. `doi:10.11587/8H3N93`. + Identifier of the dataset. Can be datafile id or persistent + identifier of the datafile (e. g. doi). + is_pid : bool + ``True`` to use persistent identifier. ``False``, if not. Returns ------- @@ -879,14 +940,22 @@ def get_datafile(self, identifier): Response object of requests library. """ - query_str = '/access/datafile/{0}'.format(identifier) + if is_pid: + query_str = '/access/datafile/{0}'.format(identifier) + else: + query_str = '/access/datafile/:persistentId/?persistentId={0}' + ''.format(identifier) resp = self.get_request(query_str) return resp def get_datafile_bundle(self, identifier): - """Download a datafile in all its formats via the Dataverse Data Access API. + """Download a datafile in all its formats. - GET /api/access/datafile/bundle/$id + HTTP Request: + + .. code-block:: bash + + GET /api/access/datafile/bundle/$id Data Access API calls can now be made using persistent identifiers (in addition to database ids). This is done by passing the constant @@ -907,7 +976,7 @@ def get_datafile_bundle(self, identifier): Parameters ---------- identifier : string - Doi of the dataset. e.g. `doi:10.11587/8H3N93`. + Identifier of the dataset. Returns ------- @@ -919,22 +988,29 @@ def get_datafile_bundle(self, identifier): data = self.get_request(query_str) return data - def upload_file(self, identifier, filename): + def upload_file(self, identifier, filename, is_pid=True): """Add file to a dataset. Add a file to an existing Dataset. Description and tags are optional: - POST http://$SERVER/api/datasets/$id/add?key=$apiKey + + HTTP Request: + + .. code-block:: bash + + POST http://$SERVER/api/datasets/$id/add The upload endpoint checks the content of the file, compares it with existing files and tells if already in the database (most likely via - hashing) + hashing). Parameters ---------- identifier : string - Doi of the dataset. e.g. `doi:10.11587/8H3N93`. + Identifier of the dataset. filename : string Full filename with path. + is_pid : bool + ``True`` to use persistent identifier. ``False``, if not. Returns ------- @@ -944,8 +1020,11 @@ def upload_file(self, identifier, filename): """ query_str = self.native_api_base_url - query_str += '/datasets/:persistentId/add?persistentId={0}'.format( - identifier) + if is_pid: + query_str += '/datasets/:persistentId/add?persistentId={0}'.format( + identifier) + else: + query_str += '/datasets/{0}/add'.format(identifier) shell_command = 'curl -H "X-Dataverse-key: {0}"'.format( self.api_token) shell_command += ' -X POST {0} -F file=@{1}'.format( @@ -958,10 +1037,14 @@ def upload_file(self, identifier, filename): def get_info_version(self): """Get the Dataverse version and build number. - The response contains the version and build numbers. + The response contains the version and build numbers. Requires no api + token. + + HTTP Request: + + .. code-block:: bash - Requires no api_token - GET http://$SERVER/api/info/version + GET http://$SERVER/api/info/version Returns ------- @@ -976,10 +1059,14 @@ def get_info_version(self): def get_info_server(self): """Get dataverse server name. - This is useful when a Dataverse system is - composed of multiple Java EE servers behind a load balancer. + This is useful when a Dataverse system is composed of multiple Java EE + servers behind a load balancer. + + HTTP Request: + + .. code-block:: bash - GET http://$SERVER/api/info/server + GET http://$SERVER/api/info/server Returns ------- @@ -997,7 +1084,11 @@ def get_info_apiTermsOfUse(self): The response contains the text value inserted as API Terms of use which uses the database setting :ApiTermsOfUse. - GET http://$SERVER/api/info/apiTermsOfUse + HTTP Request: + + .. code-block:: bash + + GET http://$SERVER/api/info/apiTermsOfUse Returns ------- @@ -1014,7 +1105,11 @@ def get_metadatablocks(self): Lists brief info about all metadata blocks registered in the system. - GET http://$SERVER/api/metadatablocks + HTTP Request: + + .. code-block:: bash + + GET http://$SERVER/api/metadatablocks Returns ------- @@ -1032,7 +1127,11 @@ def get_metadatablock(self, identifier): Returns data about the block whose identifier is passed. identifier can either be the block’s id, or its name. - GET http://$SERVER/api/metadatablocks/$identifier + HTTP Request: + + .. code-block:: bash + + GET http://$SERVER/api/metadatablocks/$identifier Parameters ---------- From 9efb68fd43e265d0bbc0469b97c73d51314ce831 Mon Sep 17 00:00:00 2001 From: Stefan Kasberger Date: Fri, 14 Jun 2019 18:37:48 +0200 Subject: [PATCH 40/46] update docstrings in utils.py --- src/pyDataverse/utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/pyDataverse/utils.py b/src/pyDataverse/utils.py index 0fb7bd3..9babe88 100644 --- a/src/pyDataverse/utils.py +++ b/src/pyDataverse/utils.py @@ -144,6 +144,10 @@ def write_file_json(filename, data, mode='w'): def csv_to_dict(filename): """Read in csv file and convert it into a list of dicts. + Header row is used to create keys for each row. So if column attributes are + named after Dataverse metadata names, the created dict can directly be used + to set Dataset attributes via `Dataset.set(data)`. + Parameters ---------- filename : string From 9d503a438b863cbcb44884159d93097e180dcd64 Mon Sep 17 00:00:00 2001 From: Stefan Kasberger Date: Fri, 14 Jun 2019 18:59:10 +0200 Subject: [PATCH 41/46] fix doc warning in api.py; fix coveralls issue in tox.ini --- src/pyDataverse/api.py | 13 +++++++------ src/pyDataverse/utils.py | 18 +++++++++++++----- tox.ini | 14 +++++++------- 3 files changed, 27 insertions(+), 18 deletions(-) diff --git a/src/pyDataverse/api.py b/src/pyDataverse/api.py index c7f5f1a..2bebc05 100644 --- a/src/pyDataverse/api.py +++ b/src/pyDataverse/api.py @@ -603,13 +603,14 @@ def get_dataset_export(self, pid, export_format): def create_dataset(self, dataverse, metadata, auth=True): """Add dataset to a dataverse. - `Dataverse Documentation `_ + `Dataverse Documentation + `_ HTTP Request: .. code-block:: bash - POST http://$SERVER/api/dataverses/$dataverse/datasets --upload-file + POST http://$SERVER/api/dataverses/$dataverse/datasets --upload-file FILENAME Add new dataset with curl: @@ -641,7 +642,7 @@ def create_dataset(self, dataverse, metadata, auth=True): dataverse (e.g. ``1``) metadata : string Metadata of the Dataset as a json-formatted string (e. g. - `dataset-finch1.json `_ `) + `dataset-finch1.json `_) Returns ------- @@ -708,10 +709,10 @@ def publish_dataset(self, pid, type='minor', auth=True): Persistent identifier of the dataset (e.g. ``doi:10.11587/8H3N93``). type : string - Passing `minor` increases the minor version number (2.3 is - updated to 2.4). Passing `major` increases the major version + Passing ``minor`` increases the minor version number (2.3 is + updated to 2.4). Passing ``major`` increases the major version number (2.3 is updated to 3.0). Superusers can pass - ``updatecurrent` to update metadata without changing the version + ``updatecurrent`` to update metadata without changing the version number. auth : bool ``True`` if api authorization is necessary. Defaults to ``False``. diff --git a/src/pyDataverse/utils.py b/src/pyDataverse/utils.py index 9babe88..168f062 100644 --- a/src/pyDataverse/utils.py +++ b/src/pyDataverse/utils.py @@ -144,9 +144,15 @@ def write_file_json(filename, data, mode='w'): def csv_to_dict(filename): """Read in csv file and convert it into a list of dicts. - Header row is used to create keys for each row. So if column attributes are - named after Dataverse metadata names, the created dict can directly be used - to set Dataset attributes via `Dataset.set(data)`. + This offers an easy import functionality of csv files with dataset metadata. + + Assumptions: + 1) The header rows contains the column names, named after Dataverse's + dataset attribute standard naming convention. + 2) One row contains one dataset + + After the import, the created dict then can directly be used to set + Dataset() attributes via ``Dataset.set(data)``. Parameters ---------- @@ -155,8 +161,10 @@ def csv_to_dict(filename): Returns ------- - dict - List with dicts for each row. + list + List with one dict per row (=dataset). The keys of the dicts are named + after the columen names, which must be named after the Dataverse + dataset metadata naming convention. """ reader = csv.DictReader(open(filename), 'r') diff --git a/tox.ini b/tox.ini index 584500a..74e1b7f 100644 --- a/tox.ini +++ b/tox.ini @@ -14,45 +14,45 @@ basepython = python3.5 deps = -r{toxinidir}/tools/tests-requirements.txt commands = - pytest tests/ --cov=src/pyDataverse --basetemp={envtmpdir} + pytest tests/ --cov=pyDataverse --basetemp={envtmpdir} [testenv:py34] deps = -r{toxinidir}/tools/tests-requirements.txt commands = - pytest tests/ --cov=src/pyDataverse --basetemp={envtmpdir} + pytest tests/ --cov=pyDataverse --basetemp={envtmpdir} [testenv:py35] deps = -r{toxinidir}/tools/tests-requirements.txt commands = - pytest tests/ --cov=src/pyDataverse --basetemp={envtmpdir} + pytest tests/ --cov=pyDataverse --basetemp={envtmpdir} [testenv:py36] deps = -r{toxinidir}/tools/tests-requirements.txt commands = - pytest tests/ --cov=src/pyDataverse --basetemp={envtmpdir} + pytest tests/ --cov=pyDataverse --basetemp={envtmpdir} [testenv:py37] deps = -r{toxinidir}/tools/tests-requirements.txt commands = - pytest tests/ --cov=src/pyDataverse --basetemp={envtmpdir} + pytest tests/ --cov=pyDataverse --basetemp={envtmpdir} [testenv:coverage] description = create report for coverage deps = -r{toxinidir}/tools/tests-requirements.txt commands = - pytest tests/ --cov=src/pyDataverse --cov-report=term-missing --cov-report=xml --cov-report=html + pytest tests/ --cov=pyDataverse --cov-report=term-missing --cov-report=xml --cov-report=html [testenv:coveralls] description = create reports for coveralls deps = -r{toxinidir}/tools/tests-requirements.txt commands = - pytest tests/ --doctest-modules -v --cov=src/pyDataverse + pytest tests/ --doctest-modules -v --cov=pyDataverse [flake8] max-line-length = 80 From 7ef6acbc291344514666d6eb256a104a018a1411 Mon Sep 17 00:00:00 2001 From: Stefan Kasberger Date: Mon, 17 Jun 2019 18:56:12 -0400 Subject: [PATCH 42/46] add read_file_csv function to utils --- src/pyDataverse/utils.py | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/src/pyDataverse/utils.py b/src/pyDataverse/utils.py index 168f062..2e787fa 100644 --- a/src/pyDataverse/utils.py +++ b/src/pyDataverse/utils.py @@ -141,7 +141,32 @@ def write_file_json(filename, data, mode='w'): write_file(filename, dict_to_json(data), mode) -def csv_to_dict(filename): +def read_file_csv(filename): + """Read in CSV file. + + See more at `csv.reader() `_. + + Parameters + ---------- + filename : string + Full filename with path of file. + + Returns + ------- + reader + Reader object, which can be iterated over. + + """ + try: + with open(filename, newline='') as csvfile: + return csv.reader(csvfile, delimiter=',', quotechar='"') + except Exception as e: + raise e + finally: + csvfile.close() + + +def read_csv_to_dict(filename): """Read in csv file and convert it into a list of dicts. This offers an easy import functionality of csv files with dataset metadata. From 7ef7b18804445a6dedd66f701cc27c3c89e3048d Mon Sep 17 00:00:00 2001 From: Stefan Kasberger Date: Mon, 17 Jun 2019 19:13:30 -0400 Subject: [PATCH 43/46] minor change in docstring --- src/pyDataverse/models.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/pyDataverse/models.py b/src/pyDataverse/models.py index 4ea6f09..f5fead2 100644 --- a/src/pyDataverse/models.py +++ b/src/pyDataverse/models.py @@ -770,8 +770,7 @@ def __parse_dicts(self, data, attr_list): return data_tmp def is_valid(self): - """Checks if attributes available are valid for Dataverse api metadata - creation. + """Check if attributes available are valid for Dataverse api metadata creation. The attributes required are listed in `__attr_required_metadata`. From 509524dbb9359a177eae2514affb6fc7a485d57a Mon Sep 17 00:00:00 2001 From: Stefan Kasberger Date: Mon, 17 Jun 2019 19:22:49 -0400 Subject: [PATCH 44/46] update pyDataverse version to v0.2.0; update docs --- HISTORY.md | 7 +++++- src/pyDataverse/__init__.py | 4 ++-- src/pyDataverse/docs/source/developer.rst | 25 +++++++++++----------- src/pyDataverse/docs/source/index.rst | 26 ++++++++++++----------- 4 files changed, 34 insertions(+), 28 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 429c989..f6577fb 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,11 +1,16 @@ .. :changelog: +0.2.0 - (2019-06-17) - Ida Pfeiffer +------------------------------------ + +[Release](https://github.com/AUSSDA/pyDataverse/releases/tag/v0.2.0) + 0.1.1 - (2019-05-28) ------------------------------------ [Release](https://github.com/AUSSDA/pyDataverse/releases/tag/v0.1.1) -0.1.0 - Marietta Blau (2019-05-20) +0.1.0 - (2019-05-20) - Marietta Blau ------------------------------------ [Release](https://github.com/AUSSDA/pyDataverse/releases/tag/v0.1.0) diff --git a/src/pyDataverse/__init__.py b/src/pyDataverse/__init__.py index a0084dc..9a9f0cc 100644 --- a/src/pyDataverse/__init__.py +++ b/src/pyDataverse/__init__.py @@ -16,7 +16,7 @@ __email__ = 'stefan.kasberger@univie.ac.at' __copyright__ = 'Copyright (c) 2019 Stefan Kasberger' __license__ = 'MIT License' -__version__ = '0.1.1' +__version__ = '0.2.0' __url__ = 'https://github.com/AUSSDA/pyDataverse' __download_url__ = 'https://pypi.python.org/pypi/pyDataverse' -__description__ = 'A Python wrapper around the Dataverse API' +__description__ = 'A Python module for Dataverse.' diff --git a/src/pyDataverse/docs/source/developer.rst b/src/pyDataverse/docs/source/developer.rst index 77102b0..a7532c1 100644 --- a/src/pyDataverse/docs/source/developer.rst +++ b/src/pyDataverse/docs/source/developer.rst @@ -95,19 +95,6 @@ When you only want to run one test, e.g. the py36 test: To find out more about which tests are available, have a look inside the tox.ini file. - -Documentation ------------------------------ - - -**Create Sphinx Docs** - -Use Sphinx to create class and function documentation out of the doc-strings. You can call it via `tox`. This creates the created docs inside `docs/build`. - -.. code-block:: shell - - tox -e docs - **Create Coverage Reports** Run tests with coverage to create html and xml reports as an output. Again, call it via `tox`. This creates the created docs inside `docs/coverage_html/`. @@ -123,3 +110,15 @@ To use Coveralls on local development: .. code-block:: shell tox -e coveralls + +Documentation +----------------------------- + + +**Create Sphinx Docs** + +Use Sphinx to create class and function documentation out of the doc-strings. You can call it via `tox`. This creates the created docs inside `docs/build`. + +.. code-block:: shell + + tox -e docs diff --git a/src/pyDataverse/docs/source/index.rst b/src/pyDataverse/docs/source/index.rst index 875b2ff..e4ea41c 100644 --- a/src/pyDataverse/docs/source/index.rst +++ b/src/pyDataverse/docs/source/index.rst @@ -27,9 +27,9 @@ Release v\ |version|. ------------------- pyDataverse is a Python module for `Dataverse `_. -It uses the `Native API `_ -and `Data Access API `_ -to create, update and remove Dataverses, Datasets and Datafiles. +It uses the `Dataverse API `_ +and it's metadata model to import, manipulate and export Dataverses, Datasets +and Datafiles. ------------------- @@ -46,22 +46,24 @@ Quickstart **Usage** >>> from pyDataverse.api import Api +>>> from pyDataverse.models import Dataverse >>> # establish connection ->>> base_url = 'http://demo.dataverse.org' +>>> base_url = 'https://data.aussda.at/' >>> api = Api(base_url) >>> api.status 'OK' >>> # get dataverse ->>> dv = 'ecastro' # dataverse alias or id +>>> dv = 'autnes' # dataverse alias or id >>> resp = api.get_dataverse(dv) >>> resp.json()['data']['creationDate'] -'2015-04-20T09:29:39Z' +'2017-11-09T13:53:27Z' >>> # get dataset +>>> identifier = 'doi:10.11587/IMKDZI' >>> resp = api.get_dataset(identifier) >>> resp.json()['data']['id'] -24 +345 >>> # get datafile ->>> datafile_id = '32' # file id of the datafile +>>> datafile_id = '399' # file id of the datafile >>> resp = api.get_datafile(datafile_id) >>> resp @@ -80,10 +82,10 @@ External packages: Features ----------------------------- -- Dataverse Api functionalities to create, get, publish and delete Dataverses, Datasets and Datafiles. -- Dataverse data model for easy manipulation and data conversion. -- Utils to support the core functionalities. -- Custom exceptions +- Dataverse Api functionalities to create, get, publish and delete Dataverses, Datasets and Datafiles of your Dataverse instance via Api. +- Dataverse metadata model for easy manipulation and data conversion from and to other formats (e. g. Dataverse Api metadata JSON). +- Utils to support core functionalities. +- Custom exceptions. - Tests on `Travis CI `_ (`pytest `_ + `tox `_). - Open Source (`MIT `_) From fa8312c4018a56b4ac3aeb9fed58e6beac39f8b5 Mon Sep 17 00:00:00 2001 From: Stefan Kasberger Date: Mon, 17 Jun 2019 19:32:12 -0400 Subject: [PATCH 45/46] update README.md features for PyPI --- README.md | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 1538242..cbb5b6b 100644 --- a/README.md +++ b/README.md @@ -2,18 +2,21 @@ # pyDataverse -pyDataverse is a Python module for [Dataverse](http://dataverse.org/). It uses the [Native API](http://guides.dataverse.org/en/latest/api/native-api.html) and [Data Access API](http://guides.dataverse.org/en/latest/api/dataaccess.html). It allows to create, update and remove Dataverses, Datasets and Datafiles via Dataverse's native API. Thanks to the developers of [dataverse-client-python](https://github.com/IQSS/dataverse-client-python), from which the project got inspired from. +pyDataverse is a Python module for [Dataverse](http://dataverse.org). +It uses the [Dataverse API](http://guides.dataverse.org/en/latest/api/index.html) +and it's metadata model to import, manipulate and export Dataverses, Datasets +and Datafiles. **Features** -* Open Source ([MIT](https://opensource.org/licenses/MIT)) -* `api.py`: Dataverse Api functionalities to create, get, publish and delete Dataverses, Datasets and Datafiles. -* `models.py`: Data model for easy manipulation and data conversion. -* `utils.py`: Functions to support the core functionalities. -* `exceptions.py`: Custom exceptions -* `tests/*`: Tests, tested on [Travis CI](https://travis-ci.com/AUSSDA/pyDataverse) ([pytest](https://docs.pytest.org/en/latest/) + [tox](http://tox.readthedocs.io/)). +* Dataverse Api functionalities to create, get, publish and delete Dataverses, Datasets and Datafiles of your Dataverse instance via Api. +* Dataverse metadata model for easy manipulation and data conversion from and to other formats (e. g. Dataverse Api metadata JSON). +* Utils to support core functionalities. +* Custom exceptions +* Tests on [Travis CI](https://travis-ci.com/AUSSDA/pyDataverse) ([pytest](https://docs.pytest.org/en/latest/) + [tox](http://tox.readthedocs.io/)). * [Documentation](https://pydataverse.readthedocs.io/en/latest/) (Sphinx, ReadTheDocs) * Python 2 and 3 (>=2.7) +* Open Source ([MIT](https://opensource.org/licenses/MIT)) **Copyright** From 6234e352160ce873d893038fbc50c18f17c87856 Mon Sep 17 00:00:00 2001 From: Stefan Kasberger Date: Mon, 17 Jun 2019 22:34:04 -0400 Subject: [PATCH 46/46] minor change in documentation --- README.md | 2 +- src/pyDataverse/docs/source/index.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index cbb5b6b..3c917f1 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ pyDataverse is a Python module for [Dataverse](http://dataverse.org). It uses the [Dataverse API](http://guides.dataverse.org/en/latest/api/index.html) -and it's metadata model to import, manipulate and export Dataverses, Datasets +and it's metadata data model to import, manipulate and export Dataverses, Datasets and Datafiles. **Features** diff --git a/src/pyDataverse/docs/source/index.rst b/src/pyDataverse/docs/source/index.rst index e4ea41c..7fbbf80 100644 --- a/src/pyDataverse/docs/source/index.rst +++ b/src/pyDataverse/docs/source/index.rst @@ -28,7 +28,7 @@ Release v\ |version|. pyDataverse is a Python module for `Dataverse `_. It uses the `Dataverse API `_ -and it's metadata model to import, manipulate and export Dataverses, Datasets +and it's metadata data model to import, manipulate and export Dataverses, Datasets and Datafiles. -------------------