From 27918896016b40354317bcef04ac460c23fff320 Mon Sep 17 00:00:00 2001 From: CorentinPeutin Date: Thu, 16 Mar 2023 16:12:07 +0100 Subject: [PATCH 1/7] Ajout copie HTTP -> FILE/S3/CEPH --- src/rok4/Storage.py | 135 +++++++++++++++++++++++------ tests/test_Storage.py | 192 ++++++++++++++++++++++++++++++++++-------- 2 files changed, 268 insertions(+), 59 deletions(-) diff --git a/src/rok4/Storage.py b/src/rok4/Storage.py index db4ec6c..e8a8a7f 100644 --- a/src/rok4/Storage.py +++ b/src/rok4/Storage.py @@ -4,6 +4,8 @@ - S3 (path are preffixed with `s3://`) - CEPH (path are preffixed with `ceph://`) - FILE (path are preffixed with `file://`, but it is the default paths' interpretation) +- HTTP (path are preffixed with `https://`) +- HTTPS (path are preffixed with `https://`) According to functions, all storage types are not necessarily available. @@ -35,6 +37,7 @@ import os import rados import hashlib +import requests from typing import Dict, List, Tuple from enum import Enum from shutil import copyfile @@ -45,6 +48,8 @@ class StorageType(Enum): FILE = "file://" S3 = "s3://" CEPH = "ceph://" + HTTP = "http://" + HTTPS = "https://" __S3_CLIENTS = dict() __S3_DEFAULT_CLIENT = None @@ -62,7 +67,7 @@ def __get_s3_client(bucket_name: str) -> Tuple['boto3.client', str, str]: Returns: Tuple['boto3.client', str, str]: the S3 client, the cluster host and the simple bucket name - """ + """ global __S3_CLIENTS, __S3_DEFAULT_CLIENT if not __S3_CLIENTS: @@ -113,7 +118,7 @@ def __get_s3_client(bucket_name: str) -> Tuple['boto3.client', str, str]: def disconnect_s3_clients() -> None: """Clean S3 clients - """ + """ global __S3_CLIENTS, __S3_DEFAULT_CLIENT __S3_CLIENTS = dict() __S3_DEFAULT_CLIENT = None @@ -134,7 +139,7 @@ def __get_ceph_ioctx(pool: str) -> 'rados.Ioctx': Returns: rados.Ioctx: IO ceph context - """ + """ global __CEPH_CLIENT, __CEPH_IOCTXS if __CEPH_CLIENT is None: @@ -146,7 +151,7 @@ def __get_ceph_ioctx(pool: str) -> 'rados.Ioctx': ) __CEPH_CLIENT.connect() - + except KeyError as e: raise MissingEnvironmentError(e) except Exception as e: @@ -157,12 +162,12 @@ def __get_ceph_ioctx(pool: str) -> 'rados.Ioctx': __CEPH_IOCTXS[pool] = __CEPH_CLIENT.open_ioctx(pool) except Exception as e: raise StorageError("CEPH", e) - + return __CEPH_IOCTXS[pool] def disconnect_ceph_clients() -> None: """Clean CEPH clients - """ + """ global __CEPH_CLIENT, __CEPH_IOCTXS __CEPH_CLIENT = None __CEPH_IOCTXS = dict() @@ -173,8 +178,8 @@ def get_infos_from_path(path: str) -> Tuple[StorageType, str, str, str]: """Extract storage type, the unprefixed path, the container and the basename from path (Default: FILE storage) For a FILE storage, the tray is the directory and the basename is the file name. - - For an object storage (CEPH or S3), the tray is the bucket or the pool and the basename is the object name. + + For an object storage (CEPH or S3), the tray is the bucket or the pool and the basename is the object name. For a S3 bucket, format can be @ to use several clusters. Cluster name is the host (without protocol) Args: @@ -192,6 +197,10 @@ def get_infos_from_path(path: str) -> Tuple[StorageType, str, str, str]: return StorageType.CEPH, path[7:], pool_name, object_name elif path.startswith("file://"): return StorageType.FILE, path[7:], os.path.dirname(path[7:]), os.path.basename(path[7:]) + elif path.startswith("http://"): + return StorageType.HTTP, path, "NULL", "NULL" + elif path.startswith("https://"): + return StorageType.HTTPS, path, "NULL", "NULL" else: return StorageType.FILE, path, os.path.dirname(path), os.path.basename(path) @@ -206,7 +215,7 @@ def get_path_from_infos(storage_type: StorageType, *args) -> str: Returns: str: Full path - """ + """ return f"{storage_type.value}{os.path.join(*args)}" @@ -247,7 +256,7 @@ def get_data_str(path: str) -> str: return get_data_binary(path).decode('utf-8') -def get_data_binary(path: str, range: Tuple[int, int] = None) -> str: +def get_data_binary(path: str, range: Tuple[int, int] = None) -> str: """Load data into a binary string Args: @@ -262,10 +271,10 @@ def get_data_binary(path: str, range: Tuple[int, int] = None) -> str: str: Data binary content """ - storage_type, path, tray_name, base_name = get_infos_from_path(path) + storage_type, path, tray_name, base_name = get_infos_from_path(path) if storage_type == StorageType.S3: - + s3_client, host, tray_name = __get_s3_client(tray_name) try: @@ -285,7 +294,7 @@ def get_data_binary(path: str, range: Tuple[int, int] = None) -> str: raise StorageError("S3", e) elif storage_type == StorageType.CEPH: - + ioctx = __get_ceph_ioctx(tray_name) try: @@ -307,11 +316,22 @@ def get_data_binary(path: str, range: Tuple[int, int] = None) -> str: else: f.seek(range[0]) data = f.read(range[1]) - + f.close() except Exception as e: raise StorageError("FILE", e) + elif storage_type == StorageType.HTTP or storage_type == StorageType.HTTPS: + + try: + reponse = requests.get(path) + data = reponse.content + print(reponse.content) + if reponse.status_code == 404 : + raise StorageError("HTTP", "Requested file does not exist") + except Exception as e: + raise StorageError("HTTP", e) + else: raise StorageError("UNKNOWN", "Unhandled storage type to read binary data") @@ -334,7 +354,7 @@ def put_data_str(data: str, path: str) -> None: storage_type, path, tray_name, base_name = get_infos_from_path(path) if storage_type == StorageType.S3: - + s3_client, host, tray_name = __get_s3_client(tray_name) try: @@ -347,7 +367,7 @@ def put_data_str(data: str, path: str) -> None: raise StorageError("S3", e) elif storage_type == StorageType.CEPH: - + ioctx = __get_ceph_ioctx(tray_name) try: @@ -412,6 +432,14 @@ def get_size(path: str) -> int: except Exception as e: raise StorageError("FILE", e) + elif storage_type == StorageType.HTTP or storage_type == StorageType.HTTPS: + + try: + reponse = requests.get(path) + return reponse.content.__sizeof__() + except Exception as e: + raise StorageError("HTTP", e) + else: raise StorageError("UNKNOWN", "Unhandled storage type to get size") @@ -461,6 +489,17 @@ def exists(path: str) -> bool: return os.path.exists(path) + elif storage_type == StorageType.HTTP or storage_type == StorageType.HTTPS: + + try: + reponse = requests.get(path) + if reponse.status_code == 200 : + return True + else : + return False + except Exception as e: + raise StorageError("HTTP", e) + else: raise StorageError("UNKNOWN", "Unhandled storage type to test if exists") @@ -529,7 +568,7 @@ def copy(from_path: str, to_path: str, from_md5: str = None) -> None: # Réalisation de la copie, selon les types de stockage if from_type == StorageType.FILE and to_type == StorageType.FILE : - + try: if to_tray != "": os.makedirs(to_tray, exist_ok=True) @@ -545,13 +584,13 @@ def copy(from_path: str, to_path: str, from_md5: str = None) -> None: raise StorageError(f"FILE", f"Cannot copy file {from_path} to {to_path} : {e}") elif from_type == StorageType.S3 and to_type == StorageType.FILE : - + s3_client, host, from_tray = __get_s3_client(from_tray) try: if to_tray != "": os.makedirs(to_tray, exist_ok=True) - + s3_client.download_file(from_tray, from_base_name, to_path) if from_md5 is not None : @@ -565,7 +604,7 @@ def copy(from_path: str, to_path: str, from_md5: str = None) -> None: elif from_type == StorageType.FILE and to_type == StorageType.S3 : s3_client, host, to_tray = __get_s3_client(to_tray) - + try: s3_client.upload_file(from_path, to_tray, to_base_name) @@ -587,7 +626,7 @@ def copy(from_path: str, to_path: str, from_md5: str = None) -> None: { 'Bucket': from_tray, 'Key': from_base_name - }, + }, to_tray, to_base_name ) else: @@ -602,7 +641,7 @@ def copy(from_path: str, to_path: str, from_md5: str = None) -> None: except Exception as e: raise StorageError(f"S3", f"Cannot copy S3 object {from_path} to {to_path} : {e}") - + elif from_type == StorageType.CEPH and to_type == StorageType.FILE : @@ -644,7 +683,7 @@ def copy(from_path: str, to_path: str, from_md5: str = None) -> None: elif from_type == StorageType.FILE and to_type == StorageType.CEPH : ioctx = __get_ceph_ioctx(to_tray) - + if from_md5 is not None: checker = hashlib.md5() @@ -706,7 +745,7 @@ def copy(from_path: str, to_path: str, from_md5: str = None) -> None: except Exception as e: raise StorageError(f"CEPH", f"Cannot copy CEPH object {from_path} to {to_path} : {e}") - + elif from_type == StorageType.CEPH and to_type == StorageType.S3 : from_ioctx = __get_ceph_ioctx(from_tray) @@ -749,6 +788,52 @@ def copy(from_path: str, to_path: str, from_md5: str = None) -> None: except Exception as e: raise StorageError(f"CEPH and S3", f"Cannot copy CEPH object {from_path} to S3 object {to_path} : {e}") + elif (from_type == StorageType.HTTP or from_type == StorageType.HTTPS) and to_type == StorageType.FILE : + + try: + reponse = requests.get(from_path, stream = True) + with open(to_path, "wb") as f: + for chunk in reponse.iter_content(chunk_size=65536) : + if chunk: + f.write(chunk) + + except Exception as e: + raise StorageError(f"HTTP(S) and FILE", f"Cannot copy HTTP(S) object {from_path} to FILE object {to_path} : {e}") + + elif (from_type == StorageType.HTTP or from_type == StorageType.HTTPS) and to_type == StorageType.CEPH : + + to_ioctx = __get_ceph_ioctx(to_tray) + + try: + reponse = requests.get(from_path, stream = True) + offset = 0 + for chunk in reponse.iter_content(chunk_size=65536) : + if chunk: + to_ioctx.write(to_base_name, chunk, offset) + offset += 65536 + + except Exception as e: + raise StorageError(f"HTTP(S) and CEPH", f"Cannot copy HTTP(S) object {from_path} to CEPH object {to_path} : {e}") + + elif (from_type == StorageType.HTTP or from_type == StorageType.HTTPS) and to_type == StorageType.S3 : + + to_s3_client, to_host, to_tray = __get_s3_client(to_tray) + + try: + reponse = requests.get(from_path, stream = True) + with tempfile.NamedTemporaryFile("w+b",delete=False) as f: + name_fich = f.name + for chunk in reponse.iter_content(chunk_size=65536) : + if chunk: + f.write(chunk) + + to_s3_client.upload_file(name_fich, to_tray, to_base_name) + + os.remove(name_fich) + + except Exception as e: + raise StorageError(f"HTTP(S) and S3", f"Cannot copy HTTP(S) object {from_path} to S3 object {to_path} : {e}") + else: raise StorageError(f"{from_type.name} and {to_type.name}", f"Cannot copy from {from_type.name} to {to_type.name}") @@ -795,7 +880,7 @@ def link(target_path: str, link_path: str, hard: bool = False) -> None: raise StorageError("S3", e) elif target_type == StorageType.CEPH: - + ioctx = __get_ceph_ioctx(link_tray) try: diff --git a/tests/test_Storage.py b/tests/test_Storage.py index 175568c..985a1ea 100644 --- a/tests/test_Storage.py +++ b/tests/test_Storage.py @@ -20,7 +20,7 @@ def test_hash_file_ok(mock_file): except Exception as exc: assert False, f"FILE md5 sum raises an exception: {exc}" -@mock.patch.dict(os.environ, {}, clear=True) +@mock.patch.dict(os.environ, {}, clear=True) def test_get_infos_from_path(): assert (StorageType.S3, "toto/titi", "toto", "titi") == get_infos_from_path("s3://toto/titi") assert (StorageType.FILE, "/toto/titi/tutu.json", "/toto/titi", "tutu.json") == get_infos_from_path("file:///toto/titi/tutu.json") @@ -28,7 +28,7 @@ def test_get_infos_from_path(): assert (StorageType.FILE, "wrong://toto/titi", "wrong://toto", "titi") == get_infos_from_path("wrong://toto/titi") -@mock.patch.dict(os.environ, {}, clear=True) +@mock.patch.dict(os.environ, {}, clear=True) def test_get_path_from_infos(): assert get_path_from_infos(StorageType.S3, "toto", "toto/titi") == "s3://toto/toto/titi" assert get_path_from_infos(StorageType.FILE, "/toto/titi", "tutu.json") == "file:///toto/titi/tutu.json" @@ -66,7 +66,7 @@ def test_s3_invalid_endpoint(mocked_s3_client): def test_file_read_error(mock_file): with pytest.raises(StorageError): data = get_data_str("file:///path/to/file.ext") - + mock_file.assert_called_with("/path/to/file.ext", "rb") @@ -80,7 +80,7 @@ def test_file_read_ok(mock_file): except Exception as exc: assert False, f"FILE read raises an exception: {exc}" -@mock.patch.dict(os.environ, {"ROK4_S3_URL": "https://a,https://b", "ROK4_S3_SECRETKEY": "a,b", "ROK4_S3_KEY": "a,b"}, clear=True) +@mock.patch.dict(os.environ, {"ROK4_S3_URL": "https://a,https://b", "ROK4_S3_SECRETKEY": "a,b", "ROK4_S3_KEY": "a,b"}, clear=True) @mock.patch('rok4.Storage.boto3.client') def test_s3_read_nok(mocked_s3_client): disconnect_s3_clients() @@ -90,7 +90,7 @@ def test_s3_read_nok(mocked_s3_client): with pytest.raises(StorageError): data = get_data_str("s3://bucket/path/to/object") -@mock.patch.dict(os.environ, {"ROK4_S3_URL": "https://a,https://b", "ROK4_S3_SECRETKEY": "a,b", "ROK4_S3_KEY": "a,b"}, clear=True) +@mock.patch.dict(os.environ, {"ROK4_S3_URL": "https://a,https://b", "ROK4_S3_SECRETKEY": "a,b", "ROK4_S3_KEY": "a,b"}, clear=True) @mock.patch('rok4.Storage.boto3.client') def test_s3_read_ok(mocked_s3_client): @@ -110,7 +110,7 @@ def test_s3_read_ok(mocked_s3_client): assert False, f"S3 read raises an exception: {exc}" -@mock.patch.dict(os.environ, {"ROK4_CEPH_CONFFILE": "a", "ROK4_CEPH_CLUSTERNAME": "b", "ROK4_CEPH_USERNAME": "c"}, clear=True) +@mock.patch.dict(os.environ, {"ROK4_CEPH_CONFFILE": "a", "ROK4_CEPH_CLUSTERNAME": "b", "ROK4_CEPH_USERNAME": "c"}, clear=True) @mock.patch('rok4.Storage.rados.Rados') def test_ceph_read_ok(mocked_rados_client): @@ -128,10 +128,36 @@ def test_ceph_read_ok(mocked_rados_client): except Exception as exc: assert False, f"CEPH read raises an exception: {exc}" +@mock.patch.dict(os.environ, {}, clear=True) +@mock.patch("requests.get", side_effect={"status_code":404}) +def test_http_read_error(mock_http): + with pytest.raises(StorageError): + requests_instance = MagicMock() + requests_instance.content = "NULL" + requests_instance.status_code = 404 + mock_http.return_value = requests_instance + data = get_data_str("http:///path/to/file.ext") + + mock_http.assert_called_with("http:///path/to/file.ext") + +@mock.patch.dict(os.environ, {}, clear=True) +@mock.patch("requests.get") +def test_http_read_ok(mock_http): + try : + requests_instance = MagicMock() + requests_instance.content = b'data' + mock_http.return_value = requests_instance + + data = get_data_str("http:///path/to/file.ext") + mock_http.assert_called_with("http:///path/to/file.ext") + assert data == 'data' + except Exception as exc: + assert False, f"HTTP read raises an exception: {exc}" + ############ put_data_str -@mock.patch.dict(os.environ, {"ROK4_S3_URL": "https://a,https://b", "ROK4_S3_SECRETKEY": "a,b", "ROK4_S3_KEY": "a,b"}, clear=True) +@mock.patch.dict(os.environ, {"ROK4_S3_URL": "https://a,https://b", "ROK4_S3_SECRETKEY": "a,b", "ROK4_S3_KEY": "a,b"}, clear=True) @mock.patch('rok4.Storage.boto3.client') def test_s3_write_nok(mocked_s3_client): @@ -144,7 +170,7 @@ def test_s3_write_nok(mocked_s3_client): put_data_str("data", "s3://bucket/path/to/object") -@mock.patch.dict(os.environ, {"ROK4_S3_URL": "https://a,https://b", "ROK4_S3_SECRETKEY": "a,b", "ROK4_S3_KEY": "a,b"}, clear=True) +@mock.patch.dict(os.environ, {"ROK4_S3_URL": "https://a,https://b", "ROK4_S3_SECRETKEY": "a,b", "ROK4_S3_KEY": "a,b"}, clear=True) @mock.patch('rok4.Storage.boto3.client') def test_s3_write_ok(mocked_s3_client): @@ -158,7 +184,7 @@ def test_s3_write_ok(mocked_s3_client): assert False, f"S3 write raises an exception: {exc}" -@mock.patch.dict(os.environ, {"ROK4_CEPH_CONFFILE": "a", "ROK4_CEPH_CLUSTERNAME": "b", "ROK4_CEPH_USERNAME": "c"}, clear=True) +@mock.patch.dict(os.environ, {"ROK4_CEPH_CONFFILE": "a", "ROK4_CEPH_CLUSTERNAME": "b", "ROK4_CEPH_USERNAME": "c"}, clear=True) @mock.patch('rok4.Storage.rados.Rados') def test_ceph_write_ok(mocked_rados_client): @@ -190,7 +216,7 @@ def test_copy_file_file_ok(mock_hash_file, mock_copyfile, mock_makedirs): assert False, f"FILE -> FILE copy raises an exception: {exc}" -@mock.patch.dict(os.environ, {"ROK4_S3_URL": "https://a,https://b", "ROK4_S3_SECRETKEY": "a,b", "ROK4_S3_KEY": "a,b"}, clear=True) +@mock.patch.dict(os.environ, {"ROK4_S3_URL": "https://a,https://b", "ROK4_S3_SECRETKEY": "a,b", "ROK4_S3_KEY": "a,b"}, clear=True) @mock.patch('rok4.Storage.boto3.client') @mock.patch('os.makedirs', return_value=None) @mock.patch('rok4.Storage.hash_file', return_value="toto") @@ -210,7 +236,7 @@ def test_copy_s3_file_ok(mock_hash_file, mock_makedirs, mocked_s3_client): assert False, f"S3 -> FILE copy raises an exception: {exc}" -@mock.patch.dict(os.environ, {"ROK4_S3_URL": "https://a,https://b", "ROK4_S3_SECRETKEY": "a,b", "ROK4_S3_KEY": "a,b"}, clear=True) +@mock.patch.dict(os.environ, {"ROK4_S3_URL": "https://a,https://b", "ROK4_S3_SECRETKEY": "a,b", "ROK4_S3_KEY": "a,b"}, clear=True) @mock.patch('rok4.Storage.boto3.client') @mock.patch('os.makedirs', return_value=None) @mock.patch('rok4.Storage.hash_file', return_value="toto") @@ -226,7 +252,7 @@ def test_copy_s3_file_nok(mock_hash_file, mock_makedirs, mocked_s3_client): mock_makedirs.assert_called_once_with("/path/to", exist_ok=True) -@mock.patch.dict(os.environ, {"ROK4_S3_URL": "https://a,https://b", "ROK4_S3_SECRETKEY": "a,b", "ROK4_S3_KEY": "a,b"}, clear=True) +@mock.patch.dict(os.environ, {"ROK4_S3_URL": "https://a,https://b", "ROK4_S3_SECRETKEY": "a,b", "ROK4_S3_KEY": "a,b"}, clear=True) @mock.patch('rok4.Storage.boto3.client') def test_copy_file_s3_ok(mocked_s3_client): @@ -242,7 +268,7 @@ def test_copy_file_s3_ok(mocked_s3_client): assert False, f"FILE -> S3 copy raises an exception: {exc}" -@mock.patch.dict(os.environ, {"ROK4_S3_URL": "https://a,https://b", "ROK4_S3_SECRETKEY": "a,b", "ROK4_S3_KEY": "a,b"}, clear=True) +@mock.patch.dict(os.environ, {"ROK4_S3_URL": "https://a,https://b", "ROK4_S3_SECRETKEY": "a,b", "ROK4_S3_KEY": "a,b"}, clear=True) @mock.patch('rok4.Storage.boto3.client') def test_copy_s3_s3_ok(mocked_s3_client): @@ -258,7 +284,7 @@ def test_copy_s3_s3_ok(mocked_s3_client): assert False, f"S3 -> S3 copy raises an exception: {exc}" -@mock.patch.dict(os.environ, {"ROK4_S3_URL": "https://a,https://b", "ROK4_S3_SECRETKEY": "a,b", "ROK4_S3_KEY": "a,b"}, clear=True) +@mock.patch.dict(os.environ, {"ROK4_S3_URL": "https://a,https://b", "ROK4_S3_SECRETKEY": "a,b", "ROK4_S3_KEY": "a,b"}, clear=True) @mock.patch('rok4.Storage.boto3.client') def test_copy_s3_s3_intercluster_ok(mocked_s3_client): @@ -274,7 +300,7 @@ def test_copy_s3_s3_intercluster_ok(mocked_s3_client): assert False, f"S3 -> S3 inter cluster copy raises an exception: {exc}" -@mock.patch.dict(os.environ, {"ROK4_S3_URL": "https://a,https://b", "ROK4_S3_SECRETKEY": "a,b", "ROK4_S3_KEY": "a,b"}, clear=True) +@mock.patch.dict(os.environ, {"ROK4_S3_URL": "https://a,https://b", "ROK4_S3_SECRETKEY": "a,b", "ROK4_S3_KEY": "a,b"}, clear=True) @mock.patch('rok4.Storage.boto3.client') def test_copy_s3_s3_intercluster_nok(mocked_s3_client): @@ -287,7 +313,7 @@ def test_copy_s3_s3_intercluster_nok(mocked_s3_client): with pytest.raises(StorageError): copy("s3://bucket@a/source.ext", "s3://bucket@c/destination.ext", "toto") -@mock.patch.dict(os.environ, {"ROK4_CEPH_CONFFILE": "a", "ROK4_CEPH_CLUSTERNAME": "b", "ROK4_CEPH_USERNAME": "c"}, clear=True) +@mock.patch.dict(os.environ, {"ROK4_CEPH_CONFFILE": "a", "ROK4_CEPH_CLUSTERNAME": "b", "ROK4_CEPH_USERNAME": "c"}, clear=True) @mock.patch('rok4.Storage.rados.Rados') @mock.patch('os.makedirs', return_value=None) @patch("builtins.open", new_callable=mock_open) @@ -306,7 +332,7 @@ def test_copy_ceph_file_ok(mock_file, mock_makedirs, mocked_rados_client): except Exception as exc: assert False, f"CEPH -> FILE copy raises an exception: {exc}" -@mock.patch.dict(os.environ, {"ROK4_CEPH_CONFFILE": "a", "ROK4_CEPH_CLUSTERNAME": "b", "ROK4_CEPH_USERNAME": "c"}, clear=True) +@mock.patch.dict(os.environ, {"ROK4_CEPH_CONFFILE": "a", "ROK4_CEPH_CLUSTERNAME": "b", "ROK4_CEPH_USERNAME": "c"}, clear=True) @mock.patch('rok4.Storage.rados.Rados') @patch("builtins.open", new_callable=mock_open, read_data=b"data") def test_copy_file_ceph_ok(mock_file, mocked_rados_client): @@ -323,7 +349,7 @@ def test_copy_file_ceph_ok(mock_file, mocked_rados_client): except Exception as exc: assert False, f"FILE -> CEPH copy raises an exception: {exc}" -@mock.patch.dict(os.environ, {"ROK4_CEPH_CONFFILE": "a", "ROK4_CEPH_CLUSTERNAME": "b", "ROK4_CEPH_USERNAME": "c"}, clear=True) +@mock.patch.dict(os.environ, {"ROK4_CEPH_CONFFILE": "a", "ROK4_CEPH_CLUSTERNAME": "b", "ROK4_CEPH_USERNAME": "c"}, clear=True) @mock.patch('rok4.Storage.rados.Rados') @patch("builtins.open", new_callable=mock_open, read_data=b"data") def test_copy_ceph_ceph_ok(mock_file, mocked_rados_client): @@ -342,7 +368,7 @@ def test_copy_ceph_ceph_ok(mock_file, mocked_rados_client): assert False, f"CEPH -> CEPH copy raises an exception: {exc}" -@mock.patch.dict(os.environ, {"ROK4_CEPH_CONFFILE": "a", "ROK4_CEPH_CLUSTERNAME": "b", "ROK4_CEPH_USERNAME": "c", "ROK4_S3_URL": "https://a,https://b", "ROK4_S3_SECRETKEY": "a,b", "ROK4_S3_KEY": "a,b"}, clear=True) +@mock.patch.dict(os.environ, {"ROK4_CEPH_CONFFILE": "a", "ROK4_CEPH_CLUSTERNAME": "b", "ROK4_CEPH_USERNAME": "c", "ROK4_S3_URL": "https://a,https://b", "ROK4_S3_SECRETKEY": "a,b", "ROK4_S3_KEY": "a,b"}, clear=True) @mock.patch('rok4.Storage.rados.Rados') @mock.patch('rok4.Storage.boto3.client') @patch("builtins.open", new_callable=mock_open, read_data=b"data") @@ -367,6 +393,69 @@ def test_copy_ceph_s3_ok(mock_file, mocked_s3_client, mocked_rados_client): except Exception as exc: assert False, f"CEPH -> S3 copy raises an exception: {exc}" +@mock.patch.dict(os.environ, {}, clear=True) +@mock.patch('requests.get') +@patch('builtins.open', new_callable=mock_open) +def test_copy_http_file_ok(mock_open, mock_requests): + try: + + http_instance = MagicMock() + http_instance.iter_content.return_value = ["data","data2"] + mock_requests.return_value = http_instance + + copy("http:///path/to/source.ext", "file:///path/to/destination.ext") + mock_requests.assert_called_once_with("http:///path/to/source.ext", stream=True) + mock_open.assert_called_once_with("/path/to/destination.ext", "wb") + except Exception as exc: + assert False, f"HTTP -> FILE copy raises an exception: {exc}" + +@mock.patch.dict(os.environ, {"ROK4_CEPH_CONFFILE": "a", "ROK4_CEPH_CLUSTERNAME": "b", "ROK4_CEPH_USERNAME": "c"}, clear=True) +@mock.patch('rok4.Storage.rados.Rados') +@mock.patch('requests.get') +def test_copy_http_ceph_ok(mock_requests, mocked_rados_client): + try: + + http_instance = MagicMock() + http_instance.iter_content.return_value = ["data","data2"] + mock_requests.return_value = http_instance + + + disconnect_ceph_clients() + ioctx_instance = MagicMock() + ioctx_instance.write.return_value = None + ceph_instance = MagicMock() + ceph_instance.open_ioctx.return_value = ioctx_instance + mocked_rados_client.return_value = ceph_instance + + copy("http:///path/to/source.ext", "ceph://pool1/source.ext") + mock_requests.assert_called_once_with("http:///path/to/source.ext", stream=True) + except Exception as exc: + assert False, f"HTTP -> CEPH copy raises an exception: {exc}" + +@mock.patch.dict(os.environ, {"ROK4_S3_URL": "https://a,https://b", "ROK4_S3_SECRETKEY": "a,b", "ROK4_S3_KEY": "a,b"}, clear=True) +@mock.patch('rok4.Storage.boto3.client') +@mock.patch('requests.get') +@patch('tempfile.NamedTemporaryFile', new_callable=mock_open) +@mock.patch('os.remove') +def test_copy_http_s3_ok(mock_remove, mock_tempfile, mock_requests, mocked_s3_client): + try: + + http_instance = MagicMock() + http_instance.iter_content.return_value = ["data","data2"] + mock_requests.return_value = http_instance + + disconnect_s3_clients() + s3_instance = MagicMock() + s3_instance.upload_file.return_value = None + s3_instance.head_object.return_value = {"ETag": "8d777f385d3dfec8815d20f7496026dc"} + mocked_s3_client.return_value = s3_instance + + copy("http:///path/to/source.ext", "s3://bucket/destination.ext") + mock_requests.assert_called_once_with("http:///path/to/source.ext", stream=True) + mock_tempfile.assert_called_once_with("w+b",delete=False) + except Exception as exc: + assert False, f"HTTP -> CEPH copy raises an exception: {exc}" + ############ link @@ -379,7 +468,7 @@ def test_link_hard_nok(): with pytest.raises(StorageError): link("ceph://pool1/source.ext", "ceph://pool2/destination.ext", True) -@mock.patch.dict(os.environ, {}, clear=True) +@mock.patch.dict(os.environ, {}, clear=True) @mock.patch('os.symlink', return_value=None) def test_link_file_ok(mock_link): try: @@ -389,7 +478,7 @@ def test_link_file_ok(mock_link): assert False, f"FILE link raises an exception: {exc}" -@mock.patch.dict(os.environ, {}, clear=True) +@mock.patch.dict(os.environ, {}, clear=True) @mock.patch('os.link', return_value=None) def test_hlink_file_ok(mock_link): try: @@ -398,7 +487,7 @@ def test_hlink_file_ok(mock_link): except Exception as exc: assert False, f"FILE hard link raises an exception: {exc}" -@mock.patch.dict(os.environ, {"ROK4_CEPH_CONFFILE": "a", "ROK4_CEPH_CLUSTERNAME": "b", "ROK4_CEPH_USERNAME": "c"}, clear=True) +@mock.patch.dict(os.environ, {"ROK4_CEPH_CONFFILE": "a", "ROK4_CEPH_CLUSTERNAME": "b", "ROK4_CEPH_USERNAME": "c"}, clear=True) @mock.patch('rok4.Storage.rados.Rados') def test_link_ceph_ok(mocked_rados_client): @@ -415,7 +504,7 @@ def test_link_ceph_ok(mocked_rados_client): assert False, f"CEPH link raises an exception: {exc}" -@mock.patch.dict(os.environ, {"ROK4_S3_URL": "https://a,https://b", "ROK4_S3_SECRETKEY": "a,b", "ROK4_S3_KEY": "a,b"}, clear=True) +@mock.patch.dict(os.environ, {"ROK4_S3_URL": "https://a,https://b", "ROK4_S3_SECRETKEY": "a,b", "ROK4_S3_KEY": "a,b"}, clear=True) @mock.patch('rok4.Storage.boto3.client') def test_link_s3_ok(mocked_s3_client): @@ -430,7 +519,7 @@ def test_link_s3_ok(mocked_s3_client): assert False, f"S3 link raises an exception: {exc}" -@mock.patch.dict(os.environ, {"ROK4_S3_URL": "https://a,https://b", "ROK4_S3_SECRETKEY": "a,b", "ROK4_S3_KEY": "a,b"}, clear=True) +@mock.patch.dict(os.environ, {"ROK4_S3_URL": "https://a,https://b", "ROK4_S3_SECRETKEY": "a,b", "ROK4_S3_KEY": "a,b"}, clear=True) @mock.patch('rok4.Storage.boto3.client') def test_link_s3_nok(mocked_s3_client): @@ -444,7 +533,7 @@ def test_link_s3_nok(mocked_s3_client): ############ get_size -@mock.patch.dict(os.environ, {}, clear=True) +@mock.patch.dict(os.environ, {}, clear=True) @mock.patch('os.stat') def test_size_file_ok(mock_stat): mock_stat.return_value.st_size = 12 @@ -454,7 +543,7 @@ def test_size_file_ok(mock_stat): except Exception as exc: assert False, f"FILE size raises an exception: {exc}" -@mock.patch.dict(os.environ, {"ROK4_CEPH_CONFFILE": "a", "ROK4_CEPH_CLUSTERNAME": "b", "ROK4_CEPH_USERNAME": "c"}, clear=True) +@mock.patch.dict(os.environ, {"ROK4_CEPH_CONFFILE": "a", "ROK4_CEPH_CLUSTERNAME": "b", "ROK4_CEPH_USERNAME": "c"}, clear=True) @mock.patch('rok4.Storage.rados.Rados') def test_size_ceph_ok(mocked_rados_client): @@ -472,7 +561,7 @@ def test_size_ceph_ok(mocked_rados_client): assert False, f"CEPH size raises an exception: {exc}" -@mock.patch.dict(os.environ, {"ROK4_S3_URL": "https://a,https://b", "ROK4_S3_SECRETKEY": "a,b", "ROK4_S3_KEY": "a,b"}, clear=True) +@mock.patch.dict(os.environ, {"ROK4_S3_URL": "https://a,https://b", "ROK4_S3_SECRETKEY": "a,b", "ROK4_S3_KEY": "a,b"}, clear=True) @mock.patch('rok4.Storage.boto3.client') def test_size_s3_ok(mocked_s3_client): @@ -487,10 +576,24 @@ def test_size_s3_ok(mocked_s3_client): except Exception as exc: assert False, f"S3 size raises an exception: {exc}" +@mock.patch.dict(os.environ, {}, clear=True) +@mock.patch('requests.get') +def test_size_http_ok(mock_requests): + + http_instance = MagicMock() + http_instance.content.__sizeof__.return_value = 12 + mock_requests.return_value = http_instance + + try: + size = get_size("http:///path/to/file.ext") + assert size == 12 + except Exception as exc: + assert False, f"HTTP size raises an exception: {exc}" + ############ exists -@mock.patch.dict(os.environ, {}, clear=True) +@mock.patch.dict(os.environ, {}, clear=True) @mock.patch('os.path.exists', return_value=True) def test_exists_file_ok(mock_exists): try: @@ -504,7 +607,7 @@ def test_exists_file_ok(mock_exists): except Exception as exc: assert False, f"FILE not exists raises an exception: {exc}" -@mock.patch.dict(os.environ, {"ROK4_CEPH_CONFFILE": "a", "ROK4_CEPH_CLUSTERNAME": "b", "ROK4_CEPH_USERNAME": "c"}, clear=True) +@mock.patch.dict(os.environ, {"ROK4_CEPH_CONFFILE": "a", "ROK4_CEPH_CLUSTERNAME": "b", "ROK4_CEPH_USERNAME": "c"}, clear=True) @mock.patch('rok4.Storage.rados.Rados') def test_exists_ceph_ok(mocked_rados_client): @@ -527,7 +630,7 @@ def test_exists_ceph_ok(mocked_rados_client): assert False, f"CEPH not exists raises an exception: {exc}" -@mock.patch.dict(os.environ, {"ROK4_S3_URL": "https://a,https://b", "ROK4_S3_SECRETKEY": "a,b", "ROK4_S3_KEY": "a,b"}, clear=True) +@mock.patch.dict(os.environ, {"ROK4_S3_URL": "https://a,https://b", "ROK4_S3_SECRETKEY": "a,b", "ROK4_S3_KEY": "a,b"}, clear=True) @mock.patch('rok4.Storage.boto3.client') def test_exists_s3_ok(mocked_s3_client): @@ -547,10 +650,31 @@ def test_exists_s3_ok(mocked_s3_client): except Exception as exc: assert False, f"CEPH not exists raises an exception: {exc}" +@mock.patch.dict(os.environ, {}, clear=True) +@mock.patch('requests.get') +def test_exists_http_ok(mock_requests): + + http_instance = MagicMock() + http_instance.status_code = 200 + mock_requests.return_value = http_instance + + try: + assert exists("http:///path/to/file.ext") + except Exception as exc: + assert False, f"HTTP exists raises an exception: {exc}" + + http_instance.status_code = 404 + mock_requests.return_value = http_instance + + try: + assert not exists("http:///path/to/file.ext") + except Exception as exc: + assert False, f"HTTP exists raises an exception: {exc}" + ############ remove -@mock.patch.dict(os.environ, {}, clear=True) +@mock.patch.dict(os.environ, {}, clear=True) @mock.patch('os.remove') def test_remove_file_ok(mock_remove): mock_remove.return_value = None @@ -565,7 +689,7 @@ def test_remove_file_ok(mock_remove): except Exception as exc: assert False, f"FILE deletion (not found) raises an exception: {exc}" -@mock.patch.dict(os.environ, {"ROK4_CEPH_CONFFILE": "a", "ROK4_CEPH_CLUSTERNAME": "b", "ROK4_CEPH_USERNAME": "c"}, clear=True) +@mock.patch.dict(os.environ, {"ROK4_CEPH_CONFFILE": "a", "ROK4_CEPH_CLUSTERNAME": "b", "ROK4_CEPH_USERNAME": "c"}, clear=True) @mock.patch('rok4.Storage.rados.Rados') def test_remove_ceph_ok(mocked_rados_client): @@ -588,7 +712,7 @@ def test_remove_ceph_ok(mocked_rados_client): assert False, f"CEPH deletion (not found) raises an exception: {exc}" -@mock.patch.dict(os.environ, {"ROK4_S3_URL": "https://a,https://b", "ROK4_S3_SECRETKEY": "a,b", "ROK4_S3_KEY": "a,b"}, clear=True) +@mock.patch.dict(os.environ, {"ROK4_S3_URL": "https://a,https://b", "ROK4_S3_SECRETKEY": "a,b", "ROK4_S3_KEY": "a,b"}, clear=True) @mock.patch('rok4.Storage.boto3.client') def test_remove_s3_ok(mocked_s3_client): @@ -600,4 +724,4 @@ def test_remove_s3_ok(mocked_s3_client): try: remove("s3://bucket/object.ext") except Exception as exc: - assert False, f"S3 deletion raises an exception: {exc}" \ No newline at end of file + assert False, f"S3 deletion raises an exception: {exc}" From 298f0c58ee400538006d3a2f4e842e35ae65eb79 Mon Sep 17 00:00:00 2001 From: CorentinPeutin Date: Thu, 16 Mar 2023 16:30:23 +0100 Subject: [PATCH 2/7] =?UTF-8?q?Mise=20=C3=A0=20jour=20du=20CHANGELOG.md?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5a62581..256187b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,29 +1,14 @@ ## Summary -Ajout de fonctionnalités de lecture de donnée d'une pyramide et suivi des recommandations PyPA pour la gestion du projet. +Ajout du type de stockage HTTP. ## Changelog ### [Added] -* TileMatrix : - * Fonction de calcul des indices de tuile et de pixel dans la tuile à partir d'un point dans le système de coordonnées du TMS -* Pyramid : - * Fonction de calcul des indices de tuile et de pixel dans la tuile à partir d'un point dans le système de coordonnées du TMS et éventuellement un niveau - * Fonctions de lecture d'une tuile : au format binaire source ou au format tableau à 3 dimensions pour les tuiles raster * Storage : - * Fonction de lecture binaire, complète ou partielle, d'un fichier ou objet S3 ou CEPH -* Exceptions : NotImplementedError permet de préciser qu'une fonctionnalité n'a pas été implémentée pour tous les cas. Ici, on ne gère pas la décompression des données raster pour les compressions packbit et LZW - -* Ajout de la publication PyPI dans la CI GitHub - -### [Changed] - -* Storage : - * La lecture sous forme de chaîne s'appuie sur la lecture complète binaire. Aucun changement à l'usage. -* TileMatrixSet : quelque soit le système de coordonnées, on ne gère que un ordre des axes X,Y ou Lon,Lat. Cependant, les fonctions de calcul de ou à partir de bbox respectent l'ordre du système dans ces dernières. - -* Passage de la configuration du projet dans le fichier `pyproject.toml` + * Ajout de la copie de HTTP vers FILE/S3/CEPH + * Ajout de la fonction de lecture d'un fichier HTTP, de l'existence d'un fichier HTTP et du calcul de taille d'un fichier HTTP