diff --git a/src/pyDataverse/api.py b/src/pyDataverse/api.py index c30ab3c..b344e0d 100644 --- a/src/pyDataverse/api.py +++ b/src/pyDataverse/api.py @@ -122,7 +122,7 @@ def get_request(self, url, params=None, auth=False): try: url = urljoin(self.base_url_api, url) - resp = httpx.get(url, params=params) + resp = httpx.get(url, params=params, follow_redirects=True) if resp.status_code == 401: error_msg = resp.json()["message"] raise ApiAuthorizationError( @@ -175,7 +175,7 @@ def post_request(self, url, data=None, auth=False, params=None, files=None): params["key"] = self.api_token try: - resp = httpx.post(url, data=data, params=params, files=files) + resp = httpx.post(url, data=data, params=params, files=files, follow_redirects=True) if resp.status_code == 401: error_msg = resp.json()["message"] raise ApiAuthorizationError( @@ -216,7 +216,7 @@ def put_request(self, url, data=None, auth=False, params=None): params["key"] = self.api_token try: - resp = httpx.put(url, data=data, params=params) + resp = httpx.put(url, data=data, params=params, follow_redirects=True) if resp.status_code == 401: error_msg = resp.json()["message"] raise ApiAuthorizationError( @@ -255,7 +255,7 @@ def delete_request(self, url, auth=False, params=None): params["key"] = self.api_token try: - return httpx.delete(url, params=params) + return httpx.delete(url, params=params, follow_redirects=True) except ConnectError: raise ConnectError( "ERROR: DELETE could not establish connection to api {}.".format(url) @@ -338,13 +338,13 @@ def get_datafile( """ is_first_param = True if is_pid: - url = "{0}/datafile/{1}".format(self.base_url_api_data_access, identifier) - if data_format or no_var_header or image_thumb: - url += "?" - else: url = "{0}/datafile/:persistentId/?persistentId={1}".format( self.base_url_api_data_access, identifier ) + else: + url = "{0}/datafile/{1}".format(self.base_url_api_data_access, identifier) + if data_format or no_var_header or image_thumb: + url += "?" if data_format: url += "format={0}".format(data_format) is_first_param = False diff --git a/tests/api/test_access.py b/tests/api/test_access.py new file mode 100644 index 0000000..25061cd --- /dev/null +++ b/tests/api/test_access.py @@ -0,0 +1,146 @@ +import os +import json +import httpx + +from pyDataverse.api import DataAccessApi, NativeApi + +class TestDataAccess: + + def test_get_data_by_id(self): + """Tests getting data file by id.""" + + # Arrange + BASE_URL = os.getenv("BASE_URL").rstrip("/") + API_TOKEN = os.getenv("API_TOKEN") + + assert BASE_URL is not None, "BASE_URL is not set" + assert API_TOKEN is not None, "API_TOKEN is not set" + + # Create dataset + metadata = json.load(open("tests/data/file_upload_ds_minimum.json")) + pid = self._create_dataset(BASE_URL, API_TOKEN, metadata) + api = DataAccessApi(BASE_URL, API_TOKEN) + + # Upload a file + self._upload_datafile(BASE_URL, API_TOKEN, pid) + + # Retrieve the file ID + file_id = self._get_file_id(BASE_URL, API_TOKEN, pid) + + # Act + response = api.get_datafile(file_id, is_pid=False) + response.raise_for_status() + content = response.content.decode("utf-8") + + # Assert + expected = open("tests/data/datafile.txt").read() + assert content == expected, "Data retrieval failed." + + def test_get_data_by_pid(self): + """Tests getting data file by id. + + Test runs with a PID instead of a file ID from Harvard. + No PID given if used within local containers + + TODO - Check if possible with containers + """ + + # Arrange + BASE_URL = "https://dataverse.harvard.edu" + pid = "doi:10.7910/DVN/26093/IGA4JD" + api = DataAccessApi(BASE_URL) + + # Act + response = api.get_datafile(pid, is_pid=True) + response.raise_for_status() + content = response.content + + # Assert + expected = self._get_file_content(BASE_URL, pid) + assert content == expected, "Data retrieval failed." + + @staticmethod + def _create_dataset( + BASE_URL: str, + API_TOKEN: str, + metadata: dict, + ): + """ + Create a dataset in the Dataverse. + + Args: + BASE_URL (str): The base URL of the Dataverse instance. + API_TOKEN (str): The API token for authentication. + metadata (dict): The metadata for the dataset. + + Returns: + str: The persistent identifier (PID) of the created dataset. + """ + url = f"{BASE_URL}/api/dataverses/root/datasets" + response = httpx.post( + url=url, + json=metadata, + headers={ + "X-Dataverse-key": API_TOKEN, + "Content-Type": "application/json", + }, + ) + + response.raise_for_status() + + return response.json()["data"]["persistentId"] + + @staticmethod + def _get_file_id( + BASE_URL: str, + API_TOKEN: str, + pid: str, + ): + """Retrieves a file ID for a given persistent identifier (PID) in Dataverse.""" + + response = httpx.get( + url=f"{BASE_URL}/api/datasets/:persistentId/?persistentId={pid}", + headers={ + "X-Dataverse-key": API_TOKEN, + "Content-Type": "application/json", + } + ) + + response.raise_for_status() + + return response.json()["data"]["latestVersion"]["files"][0]["dataFile"]["id"] + + @staticmethod + def _upload_datafile( + BASE_URL: str, + API_TOKEN: str, + pid: str, + ): + """Uploads a file to Dataverse""" + + url = f"{BASE_URL}/api/datasets/:persistentId/add?persistentId={pid}" + response = httpx.post( + url=url, + files={"file": open("tests/data/datafile.txt", "rb")}, + headers={ + "X-Dataverse-key": API_TOKEN, + }, + ) + + response.raise_for_status() + + @staticmethod + def _get_file_content( + BASE_URL: str, + pid: str, + ): + """Retrieves the file content for testing purposes.""" + + response = httpx.get( + url=f"{BASE_URL}/api/access/datafile/:persistentId/?persistentId={pid}", + follow_redirects=True, + ) + + response.raise_for_status() + + return response.content