diff --git a/deeplake/api/dataset.py b/deeplake/api/dataset.py index 45e96711af..5e42d70feb 100644 --- a/deeplake/api/dataset.py +++ b/deeplake/api/dataset.py @@ -72,6 +72,7 @@ LockedException, BadRequestException, RenameError, + InvalidPandasDataframeError, ) from deeplake.util.storage import ( get_storage_and_cache_chain, @@ -82,6 +83,17 @@ from deeplake.util.cache_chain import generate_chain from deeplake.core.storage.deeplake_memory_object import DeepLakeMemoryObject +allow_delete_error_message = "Dataset overwrite failed. The dataset is marked as allow_delete=false. To allow overwrite, you must first run `allow_delete = True` on the dataset." +see_traceback_error_message = ( + "Dataset overwrite failed. See traceback for more information." +) + +dataset_corrupted_error_message = ( + "The source dataset is corrupted." + "You can try to fix this by loading the dataset with `reset=True` " + "which will attempt to reset uncommitted HEAD changes and load the previous version." +) + def _check_indra_and_read_only_flags(indra: bool, read_only: Optional[bool]): if indra == False: @@ -258,7 +270,7 @@ def init( db_engine = parse_runtime_parameters(path, runtime)["tensor_db"] try: - storage, cache_chain = get_storage_and_cache_chain( + _, cache_chain = get_storage_and_cache_chain( path=path, db_engine=db_engine, read_only=read_only, @@ -279,16 +291,12 @@ def init( if ds_exists: if overwrite: if not dataset._allow_delete(cache_chain): - raise DatasetHandlerError( - "Dataset overwrite failed. The dataset is marked as allow_delete=false. To allow overwrite, you must first run `allow_delete = True` on the dataset." - ) + raise DatasetHandlerError(allow_delete_error_message) try: cache_chain.clear() except Exception as e: - raise DatasetHandlerError( - "Dataset overwrite failed. See traceback for more information." - ) from e + raise DatasetHandlerError(see_traceback_error_message) from e create = True else: create = False @@ -403,7 +411,7 @@ def exists( _fetch_creds_from_key(creds, org_id, token) try: - storage, cache_chain = get_storage_and_cache_chain( + storage, _ = get_storage_and_cache_chain( path=path, read_only=True, creds=creds, @@ -523,16 +531,12 @@ def empty( if overwrite and dataset_exists(cache_chain): if not dataset._allow_delete(cache_chain): - raise DatasetHandlerError( - "Dataset overwrite failed. The dataset is marked as allow_delete=false. To allow overwrite, you must first run `allow_delete = True` on the dataset." - ) + raise DatasetHandlerError(allow_delete_error_message) try: cache_chain.clear() except Exception as e: - raise DatasetHandlerError( - "Dataset overwrite failed. See traceback for more information." - ) from e + raise DatasetHandlerError(see_traceback_error_message) from e elif dataset_exists(cache_chain): raise DatasetHandlerError( f"A dataset already exists at the given path ({path}). If you want to create" @@ -1143,11 +1147,11 @@ def _like( # (No reporting) if dest_path == src_path: # load tensor data to memory before deleting # in case of in-place deeplake.like - meta = source_tensor.meta - info = source_tensor.info - sample_shape_tensor = source_tensor._sample_shape_tensor - sample_id_tensor = source_tensor._sample_id_tensor - sample_info_tensor = source_tensor._sample_info_tensor + _ = source_tensor.meta + _ = source_tensor.info + _ = source_tensor._sample_shape_tensor + _ = source_tensor._sample_id_tensor + _ = source_tensor._sample_info_tensor destination_ds.delete_tensor(tensor_name) destination_ds.create_tensor_like(tensor_name, source_tensor, unlink=tensor_name in unlink) # type: ignore @@ -1218,9 +1222,7 @@ def copy( ) except DatasetCorruptError as e: raise DatasetCorruptError( - "The source dataset is corrupted.", - "You can try to fix this by loading the dataset with `reset=True` " - "which will attempt to reset uncommitted HEAD changes and load the previous version.", + dataset_corrupted_error_message, e.__cause__, ) else: @@ -1323,9 +1325,7 @@ def deepcopy( ) except DatasetCorruptError as e: raise DatasetCorruptError( - "The source dataset is corrupted.", - "You can try to fix this by loading the dataset with `reset=True` " - "which will attempt to reset uncommitted HEAD changes and load the previous version.", + dataset_corrupted_error_message, e.__cause__, ) else: @@ -1364,16 +1364,12 @@ def deepcopy( if dataset_exists(cache_chain): if overwrite: if not dataset._allow_delete(cache_chain): - raise DatasetHandlerError( - "Dataset overwrite failed. The dataset is marked as allow_delete=false. To allow overwrite, you must first run `allow_delete = True` on the dataset." - ) + raise DatasetHandlerError(allow_delete_error_message) try: cache_chain.clear() except Exception as e: - raise DatasetHandlerError( - "Dataset overwrite failed. See traceback for more information." - ) from e + raise DatasetHandlerError(see_traceback_error_message) from e else: raise DatasetHandlerError( f"A dataset already exists at the given path ({dest}). If you want to copy to a new dataset, either specify another path or use overwrite=True." @@ -2125,7 +2121,7 @@ def ingest_dataframe( Dataset: New dataset created from the dataframe. Raises: - Exception: If ``src`` is not a valid pandas dataframe object. + InvalidPandasDataframeError: If ``src`` is not a valid pandas dataframe object. """ import pandas as pd from deeplake.auto.structured.dataframe import DataFrame @@ -2138,7 +2134,7 @@ def ingest_dataframe( ) if not isinstance(src, pd.DataFrame): - raise Exception("Source provided is not a valid pandas dataframe object") + raise InvalidPandasDataframeError() structured = DataFrame(src, column_params, src_creds, creds_key) @@ -2244,9 +2240,7 @@ def export_yolo( ) except DatasetCorruptError as e: raise DatasetCorruptError( - "The source dataset is corrupted.", - "You can try to fix this by loading the dataset with `reset=True` " - "which will attempt to reset uncommitted HEAD changes and load the previous version.", + dataset_corrupted_error_message, e.__cause__, ) else: diff --git a/deeplake/auto/structured/dataframe.py b/deeplake/auto/structured/dataframe.py index 1b892d066d..43330b865d 100644 --- a/deeplake/auto/structured/dataframe.py +++ b/deeplake/auto/structured/dataframe.py @@ -3,7 +3,7 @@ from deeplake import Dataset from deeplake import read, link from deeplake.htype import HTYPE_SUPPORTED_COMPRESSIONS -from deeplake.util.exceptions import IngestionError +from deeplake.util.exceptions import IngestionError, InvalidPandasDataframeError from deeplake.util.dataset import sanitize_tensor_name from collections import defaultdict @@ -26,14 +26,14 @@ def __init__(self, source, column_params=None, creds=None, creds_key=None): Raises: - Exception: If source is not a pandas dataframe object. + InvalidPandasDataframeError: If source is not a pandas dataframe object. """ import pandas as pd # type: ignore super().__init__(source) if not isinstance(self.source, pd.DataFrame): - raise Exception("Source is not a pandas dataframe object.") + raise InvalidPandasDataframeError() self.creds = creds self.creds_key = creds_key diff --git a/deeplake/auto/tests/test_ingestion.py b/deeplake/auto/tests/test_ingestion.py index 1bd1946b5f..62253ef6d7 100644 --- a/deeplake/auto/tests/test_ingestion.py +++ b/deeplake/auto/tests/test_ingestion.py @@ -6,6 +6,7 @@ SamePathException, DatasetHandlerError, IngestionError, + InvalidPandasDataframeError, ) import numpy as np import pytest diff --git a/deeplake/client/utils.py b/deeplake/client/utils.py index e78afb71db..54a63938d7 100644 --- a/deeplake/client/utils.py +++ b/deeplake/client/utils.py @@ -29,18 +29,6 @@ BEST_RECALL = "best_recall@10" -def remove_username_from_config(): - try: - config = {} - with open(REPORTING_CONFIG_FILE_PATH, "r") as f: - config = json.load(f) - config["username"] = "public" - with open(REPORTING_CONFIG_FILE_PATH, "w") as f: - json.dump(config, f) - except (FileNotFoundError, KeyError): - return - - def check_response_status(response: requests.Response): """Check response status and throw corresponding exception on failure.""" code = response.status_code diff --git a/deeplake/core/chunk/chunk_compressed_chunk.py b/deeplake/core/chunk/chunk_compressed_chunk.py index 8853c3b3c8..451f1dfb62 100644 --- a/deeplake/core/chunk/chunk_compressed_chunk.py +++ b/deeplake/core/chunk/chunk_compressed_chunk.py @@ -160,13 +160,12 @@ def extend_if_has_space_byte_compression_numpy( cast = False sample_nbytes = sample.nbytes else: - if sample.size: - if not np.can_cast(sample_dtype, chunk_dtype): - raise TensorDtypeMismatchError( - chunk_dtype, - sample_dtype, - self.htype, - ) + if sample.size and not np.can_cast(sample_dtype, chunk_dtype): + raise TensorDtypeMismatchError( + chunk_dtype, + sample_dtype, + self.htype, + ) cast = True sample_nbytes = np.dtype(chunk_dtype).itemsize * sample.size min_chunk_size = self.min_chunk_size diff --git a/deeplake/core/chunk/uncompressed_chunk.py b/deeplake/core/chunk/uncompressed_chunk.py index a30abab715..feac604558 100644 --- a/deeplake/core/chunk/uncompressed_chunk.py +++ b/deeplake/core/chunk/uncompressed_chunk.py @@ -48,9 +48,8 @@ def _extend_if_has_space_text( num_data_bytes = self.num_data_bytes space_left = min_chunk_size - num_data_bytes idx = np.searchsorted(csum, space_left) - if not idx and csum[0] > space_left: - if self._data_bytes: - return 0 + if not idx and csum[0] > space_left and self._data_bytes: + return 0 num_samples = int(min(len(incoming_samples), idx + 1)) # type: ignore bts = list( map(self._text_sample_to_byte_string, incoming_samples[:num_samples]) @@ -131,13 +130,12 @@ def _extend_if_has_space_numpy( chunk_dtype = self.dtype samples_dtype = incoming_samples.dtype if samples_dtype != chunk_dtype: - if size: - if not np.can_cast(samples_dtype, chunk_dtype): - raise TensorDtypeMismatchError( - chunk_dtype, - samples_dtype, - self.htype, - ) + if size and not np.can_cast(samples_dtype, chunk_dtype): + raise TensorDtypeMismatchError( + chunk_dtype, + samples_dtype, + self.htype, + ) samples = samples.astype(chunk_dtype) self._data_bytes += samples.tobytes() # type: ignore self.register_in_meta_and_headers( @@ -163,7 +161,7 @@ def _extend_if_has_space_list( if shape is not None and not self.tensor_meta.is_link: self.num_dims = self.num_dims or len(shape) check_sample_shape(shape, self.num_dims) - except Exception as e: + except Exception: if ignore_errors: skipped.append(i) continue diff --git a/deeplake/core/chunk_engine.py b/deeplake/core/chunk_engine.py index 963b8f8b56..9896723781 100644 --- a/deeplake/core/chunk_engine.py +++ b/deeplake/core/chunk_engine.py @@ -838,26 +838,27 @@ def _samples_to_chunks( incoming_num_samples = len(samples) enc_ids: List[Optional[str]] = [] enc_count = [0] - if extending: - if self.tensor_meta.htype == "text" and ( - self.chunk_class != SampleCompressedChunk - ): - lengths = np.zeros(len(samples), dtype=np.uint32) - for i, s in enumerate(samples): - try: - s = s.numpy() - except AttributeError: - pass + if ( + extending + and self.tensor_meta.htype == "text" + and (self.chunk_class != SampleCompressedChunk) + ): + lengths = np.zeros(len(samples), dtype=np.uint32) + for i, s in enumerate(samples): + try: + s = s.numpy() + except AttributeError: + pass + try: + if s.dtype.name[:3] == "str": + lengths[i] = len(str(s.reshape(()))) + except AttributeError: try: - if s.dtype.name[:3] == "str": - lengths[i] = len(str(s.reshape(()))) - except AttributeError: - try: - lengths[i] = s.__len__() - except AttributeError: # None - lengths[i] = 0 - except TypeError: # Numpy scalar str - lengths[i] = str(s).__len__() + lengths[i] = s.__len__() + except AttributeError: # None + lengths[i] = 0 + except TypeError: # Numpy scalar str + lengths[i] = str(s).__len__() extra_args = {"lengths": lengths} current_chunk = start_chunk updated_chunks: List[Optional[str]] = [] @@ -1039,12 +1040,11 @@ def _handle_tiled_sample( lengths, ): sample = samples[0] - if sample.is_first_write: - if register: - if start_chunk_row is not None: - enc.register_samples(1) - else: - enc_count[-1] += 1 + if sample.is_first_write and register: + if start_chunk_row is not None: + enc.register_samples(1) + else: + enc_count[-1] += 1 if sample.is_last_write: tiles[ incoming_num_samples - len(samples) + bool(register) * orig_meta_length @@ -1775,7 +1775,7 @@ def _update_with_operator( samples: Union[np.ndarray, Sequence[InputSample], InputSample], operator: str, ): - """Update data at `index` with the output of elem-wise operatorion with samples""" + """Update data at `index` with the output of elem-wise operation with samples""" try: if isinstance(samples, deeplake.core.tensor.Tensor): samples = samples.numpy() @@ -1891,7 +1891,7 @@ def _get_full_chunk(self, index) -> bool: """ threshold = 10 - if type(index.values[0].value) == slice: + if isinstance(index.values[0].value, slice): start = index.values[0].value.start or 0 stop = index.values[0].value.stop or self.num_samples step = index.values[0].value.step or 1 @@ -2218,11 +2218,10 @@ def load_chunks( if exception: raise exception chunk, chunk_info = future.result() - if chunk: - if _get_nbytes(chunk) <= self.cache.cache_size: - self.cache._insert_in_cache( - self.get_chunk_key_for_id(chunk_info[0]), chunk - ) + if chunk and _get_nbytes(chunk) <= self.cache.cache_size: + self.cache._insert_in_cache( + self.get_chunk_key_for_id(chunk_info[0]), chunk + ) yield chunk_info else: with ThreadPoolExecutor() as executor: @@ -2232,11 +2231,10 @@ def load_chunks( repeat(storages), ): chunk, chunk_info = result - if chunk: - if _get_nbytes(chunk) <= self.cache.cache_size: - self.cache._insert_in_cache( - self.get_chunk_key_for_id(chunk_info[0]), chunk - ) + if chunk and _get_nbytes(chunk) <= self.cache.cache_size: + self.cache._insert_in_cache( + self.get_chunk_key_for_id(chunk_info[0]), chunk + ) yield chunk_info def _get_samples( diff --git a/deeplake/core/tensor.py b/deeplake/core/tensor.py index 90ade0cde7..962661ff1e 100644 --- a/deeplake/core/tensor.py +++ b/deeplake/core/tensor.py @@ -1546,11 +1546,11 @@ def deserialize_inverted_index(self, serialized_data): stream = BytesIO(serialized_data) # Read number of partitions - metadataSize = int.from_bytes( + metadata_size = int.from_bytes( stream.read(8), "little" ) # Assuming size_t is 8 bytes - metadata_bytes = stream.read(metadataSize) + metadata_bytes = stream.read(metadata_size) metadata = json.loads(metadata_bytes.decode("utf-8")) temp_paths_size = int.from_bytes(stream.read(8), "little") @@ -1847,7 +1847,6 @@ def create_vdb_index( metadata_json = json.dumps(metadata) metadata_bytes = metadata_json.encode("utf-8") self.storage[inverted_meta_key] = metadata_bytes - temp_serialized_paths_count = len(temp_serialized_paths) temp_serialized_paths = [str(path) for path in temp_serialized_paths] for i, path in enumerate(temp_serialized_paths): file_name = pathlib.Path(path).name @@ -1985,7 +1984,7 @@ def _verify_and_delete_vdb_indexes(self): def load_vdb_index(self, id: str): if not self.meta.contains_vdb_index(id): raise ValueError(f"Tensor meta has no vdb index with name '{id}'.") - if not self.dataset.libdeeplake_dataset is None: + if self.dataset.libdeeplake_dataset is not None: ds = self.dataset.libdeeplake_dataset else: from deeplake.enterprise.convert_to_libdeeplake import ( @@ -2003,7 +2002,7 @@ def load_vdb_index(self, id: str): raise ValueError(f"An error occurred while loading the VDB index {id}: {e}") def unload_vdb_index_cache(self): - if not self.dataset.libdeeplake_dataset is None: + if self.dataset.libdeeplake_dataset is not None: ds = self.dataset.libdeeplake_dataset else: from deeplake.enterprise.convert_to_libdeeplake import ( @@ -2028,7 +2027,7 @@ def get_vdb_indexes(self) -> List[Dict[str, str]]: def fetch_vdb_indexes(self) -> List[Dict[str, str]]: vdb_indexes = [] if self.meta.htype == "embedding" or self.meta.htype == "text": - if (not self.meta.vdb_indexes is None) and len(self.meta.vdb_indexes) > 0: + if (self.meta.vdb_indexes is not None) and len(self.meta.vdb_indexes) > 0: vdb_indexes.extend(self.meta.vdb_indexes) return vdb_indexes diff --git a/deeplake/core/tests/test_indra_dataset.py b/deeplake/core/tests/test_indra_dataset.py index 4b9c2459ed..9d578c49cb 100644 --- a/deeplake/core/tests/test_indra_dataset.py +++ b/deeplake/core/tests/test_indra_dataset.py @@ -17,7 +17,7 @@ def test_indexing(local_auth_ds_generator): deeplake_ds.create_tensor( "label", htype="generic", dtype=np.int32, **disabale_hidden_tensors_config ) - for i in range(1000): + for _ in range(1000): deeplake_ds.label.append(int(100 * random.uniform(0.0, 1.0))) indra_ds = dataset_to_libdeeplake(deeplake_ds) @@ -64,7 +64,7 @@ def test_save_view(local_auth_ds_generator): deeplake_ds.create_tensor( "label", htype="generic", dtype=np.int32, **disabale_hidden_tensors_config ) - for i in range(1000): + for _ in range(1000): deeplake_ds.label.append(int(100 * random.uniform(0.0, 1.0))) deeplake_ds.commit("First") @@ -104,7 +104,7 @@ def test_load_view(local_auth_ds_generator): deeplake_indra_ds = IndraDatasetView(indra_ds=indra_ds) with pytest.raises(Exception): - dataloader = deeplake_indra_ds.pytorch() + deeplake_indra_ds.pytorch() query_str = "select * group by label" view = deeplake_ds.query(query_str) @@ -233,7 +233,7 @@ def test_accessing_data(local_auth_ds_generator): deeplake_ds.create_tensor( "label", htype="generic", dtype=np.int32, **disabale_hidden_tensors_config ) - for i in range(1000): + for _ in range(1000): deeplake_ds.label.append(int(100 * random.uniform(0.0, 1.0))) indra_ds = dataset_to_libdeeplake(deeplake_ds) diff --git a/deeplake/integrations/mmseg/mmseg_.py b/deeplake/integrations/mmseg/mmseg_.py index ef1f0e2cb2..cf62af0fb5 100644 --- a/deeplake/integrations/mmseg/mmseg_.py +++ b/deeplake/integrations/mmseg/mmseg_.py @@ -164,6 +164,7 @@ from typing import Callable, Optional, List, Dict from prettytable import PrettyTable # type: ignore from PIL import Image # type: ignore +from deeplake.util.exceptions import ValidationDatasetMissingError from mmseg.core import DistEvalHook, EvalHook # type: ignore from mmseg.core import build_optimizer @@ -611,7 +612,6 @@ def _train_segmentor( eval_cfg = cfg.get("evaluation", {}) dl_impl = cfg.get("deeplake_dataloader_type", "auto").lower() - # TODO: check whether dataset is actually supported by enterprise dataloader if c++ if dl_impl == "auto": dl_impl = "c++" if indra_available() else "python" elif dl_impl == "cpp": @@ -733,9 +733,8 @@ def _train_segmentor( cfg.get("momentum_config", None), ) - if distributed: - if isinstance(runner, EpochBasedRunner): - runner.register_hook(DistSamplerSeedHook()) + if distributed and isinstance(runner, EpochBasedRunner): + runner.register_hook(DistSamplerSeedHook()) # register eval hooks if validate: @@ -764,14 +763,8 @@ def _train_segmentor( if ds_val is None: cfg_ds_val = cfg.data.get("val") - if cfg_ds_val is None: - raise Exception( - "Validation dataset is not specified even though validate = True. Please set validate = False or specify a validation dataset." - ) - elif cfg_ds_val.get("deeplake_path") is None: - raise Exception( - "Validation dataset is not specified even though validate = True. Please set validate = False or specify a validation dataset." - ) + if cfg_ds_val is None or cfg_ds_val.get("deeplake_path") is None: + raise ValidationDatasetMissingError() ds_val = load_ds_from_cfg(cfg.data.val) ds_val_tensors = cfg.data.val.get("deeplake_tensors", {}) @@ -779,13 +772,11 @@ def _train_segmentor( cfg_data = cfg.data.val.get("deeplake_path") if cfg_data is not None: always_warn( - "A Deep Lake dataset was specified in the cfg as well as inthe dataset input to train_segmentor. The dataset input to train_segmentor will be used in the workflow." + "A Deep Lake dataset was specified in the cfg as well as in the dataset input to train_segmentor. The dataset input to train_segmentor will be used in the workflow." ) if ds_val is None: - raise Exception( - "Validation dataset is not specified even though validate = True. Please set validate = False or specify a validation dataset." - ) + raise ValidationDatasetMissingError() if ds_val_tensors: val_images_tensor = ds_val_tensors["img"] @@ -871,8 +862,8 @@ def build_dataloader( dataset.CLASSES = classes pipeline = build_pipeline(pipeline) persistent_workers = train_loader_config.get("persistent_workers", False) - ignore_index = train_loader_config.get("ignore_index") - reduce_zero_label = train_loader_config.get("reduce_zero_label") + _ = train_loader_config.get("ignore_index") + _ = train_loader_config.get("reduce_zero_label") dist = train_loader_config["dist"] if dist and implementation == "python": raise NotImplementedError( diff --git a/deeplake/util/exceptions.py b/deeplake/util/exceptions.py index 0767ad1fb8..11fc7f1d7e 100644 --- a/deeplake/util/exceptions.py +++ b/deeplake/util/exceptions.py @@ -593,7 +593,7 @@ def __init__(self, tensor, sample=None): if print_item: msg += str(sample) + " " - elif print_path: + else: msg += f"at path '{sample.path}' " else: msg = f"Failed to append a sample " @@ -897,6 +897,7 @@ def __init__(self): "`ds.extend({'image_tensor': samples, 'label_tensor': samples})` or you need to call `extend` method of the required tensor, " "like: `ds.image_tensor.extend(samples)`" ) + super().__init__(message) class DatasetTooLargeToDelete(Exception): @@ -1143,9 +1144,23 @@ def __init__(self): super().__init__(msg) +class ValidationDatasetMissingError(Exception): + def __init__(self): + msg = ( + "Validation dataset is not specified even though validate = True. " + "Please set validate = False or specify a validation dataset." + ) + super().__init__(msg) + + class InvalidAuthContextError(Exception): def __init__( self, message: str = "Authentication failed due to invalid or insufficient configuration.", ): super().__init__(message) + + +class InvalidPandasDataframeError(Exception): + def __init__(self): + super().__init__("Source provided is not a valid pandas dataframe object") diff --git a/deeplake/util/version_control.py b/deeplake/util/version_control.py index ebe371d6ef..9230e703e9 100644 --- a/deeplake/util/version_control.py +++ b/deeplake/util/version_control.py @@ -412,7 +412,6 @@ def delete_branch( storage = dataset.storage storage.check_readonly() - # storage = dataset.storage version_state = dataset.version_state if version_state["branch"] == branch_name: raise VersionControlError( diff --git a/deeplake/util/video.py b/deeplake/util/video.py index 29dce68920..70ebc2e2cd 100644 --- a/deeplake/util/video.py +++ b/deeplake/util/video.py @@ -35,7 +35,7 @@ def normalize_index(index, nframes): start, stop = stop + 1, start + 1 elif isinstance(index, list): raise IndexError( - f"Cannot specify a list video frames. You must specify a range with an optional step such as [5:10] or [0:100:5]" + "Cannot specify a list video frames. You must specify a range with an optional step such as [5:10] or [0:100:5]" ) else: raise IndexError( diff --git a/pyproject.toml b/pyproject.toml index 4b0daa3703..08c6acea73 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,6 +19,7 @@ numpy = [ { version = "^1.24.0", python = "<3.12" }, { version = "^1.26.0", python = ">=3.12" }, ] +botocore = "*" python = ">=3.8,<=3.13" pillow = "~10.4.0" boto3 = "*" @@ -30,13 +31,24 @@ lz4 = "*" pyjwt = "*" pydantic = "*" libdeeplake = { version = "0.0.146", markers = "python_version >= '3.8' and sys_platform != 'win32'" } - av = { version = ">=8.1.0", markers = "python_version >= '3.7' or sys_platform != 'win32'" } aioboto3 = { version = ">=10.4.0", markers = "python_version >= '3.7' and sys_platform != 'win32'" } +nest_asyncio = { version="*", markers = "python_version >= '3.7' and sys_platform != 'win32'" } +google-cloud-storage = { version = "~1.42.0", optional = true } +google-auth = { version = "~2.0.1", optional = true } +google-auth-oauthlib = { version = "~0.4.5", optional = true } +google-api-python-client = { version = "~2.31.0", optional = true } +oauth2client = { version = "~4.1.3", optional = true } [tool.poetry.extras] -gcp = ["google-cloud-storage", "google-auth", "google-auth-oauthlib"] -av = ["av"] +audio = ["av"] +video = ["av"] +gcp = [ + "google-cloud-storage", + "google-auth", + "google-auth-oauthlib" +] +azure = ["azure-cli", "azure-identity", "azure-storage-blob"] dicom = ["pydicom", "nibabel"] medical = ["pydicom", "nibabel"] visualizer = ["IPython", "flask"] @@ -68,9 +80,10 @@ all = [ "pyjwt" ] + [build-system] -requires = ["poetry-core>=1.0.0"] -build-backend = "poetry.core.masonry.api" +requires = ["poetry-core>=1.0.0", "setuptools>=42"] +build-backend = "setuptools.build_meta" [tool.coverage.run] relative_files = true