Skip to content

Commit

Permalink
Merge branch 'main' into python-version-check
Browse files Browse the repository at this point in the history
  • Loading branch information
nvoxland-al authored Sep 24, 2024
2 parents 17f4b77 + 80fda1c commit b712f6f
Show file tree
Hide file tree
Showing 48 changed files with 1,801 additions and 299 deletions.
4 changes: 3 additions & 1 deletion deeplake/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
ingest_kaggle = api_dataset.ingest_kaggle
ingest_dataframe = api_dataset.ingest_dataframe
ingest_huggingface = huggingface.ingest_huggingface
export_yolo = api_dataset.export_yolo
dataset = api_dataset.init # type: ignore
tensor = Tensor
random = DeeplakeRandom()
Expand All @@ -78,6 +79,7 @@
"ingest_kaggle",
"ingest_dataframe",
"ingest_huggingface",
"export_yolo",
"compressions",
"htypes",
"config",
Expand All @@ -89,7 +91,7 @@
]


__version__ = "3.9.19"
__version__ = "3.9.24"
warn_if_update_required(__version__)
__encoded_version__ = np.array(__version__)
config = {"s3": Config(max_pool_connections=50, connect_timeout=300, read_timeout=300)}
Expand Down
91 changes: 90 additions & 1 deletion deeplake/api/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from deeplake.auto.unstructured.kaggle import download_kaggle_dataset
from deeplake.auto.unstructured.image_classification import ImageClassification
from deeplake.auto.unstructured.coco.coco import CocoDataset
from deeplake.auto.unstructured.yolo.yolo import YoloDataset
from deeplake.auto.unstructured.yolo.yolo import YoloDataset, YoloExport
from deeplake.client.client import DeepLakeBackendClient
from deeplake.client.log import logger
from deeplake.core.dataset import Dataset, dataset_factory
Expand Down Expand Up @@ -2169,6 +2169,95 @@ def ingest_dataframe(

return ds # type: ignore

@staticmethod
def export_yolo(
src: Union[str, pathlib.Path, Dataset],
dest: Union[str, pathlib.Path],
src_creds: Optional[Union[str, Dict]] = None,
token: Optional[str] = None,
progressbar: Optional[bool] = True,
image_tensor: Optional[str] = None,
label_tensor: Optional[str] = None,
box_tensor: Optional[str] = None,
limit: Optional[int] = None,
):
"""Export Deep Lake dataset as files in YOLO format. The dataset must contain 1 tensor with each of the following htypes: image, bbox, and class_label.
Examples:
>>> # Export a Deep Lake dataset in YOLO format from Deep Lake dataset path
>>> deeplake.export_yolo("hub://<org_id>/<dataset_name>", "./path/to/export/directory")
>>>
>>> # Export a Deep Lake dataset in YOLO format from Deep Lake dataset view
>>> ds = deeplake.load("hub://<org_id>/<dataset_name>")
>>> view = ds[0:100]
>>> deeplake.export_yolo(view, "./path/to/export/directory")
>>>
>>> # Export a Deep Lake dataset in YOLO format from Deep Lake dataset view
>>> ds = deeplake.load("hub://<org_id>/<dataset_name>")
>>> ds.checkout(<commit_id>)
>>> view = ds.query("select * where ....")
>>> deeplake.export_yolo(view, "./path/to/export/directory")
>>>
Args:
src (str, pathlib.Path, Dataset): The Deep Lake dataset to be exported to YOLO.
- A Dataset or The full path to the dataset. Can be:
- a Deep Lake cloud path of the form ``hub://org_id/datasetname``. To write to Deep Lake cloud datasets, ensure that you are authenticated to Deep Lake (pass in a token using the 'token' parameter).
- an s3 path of the form ``s3://bucketname/path/to/dataset``. Credentials are required in either the environment or passed to the creds argument.
- a local file system path of the form ``./path/to/dataset`` or ``~/path/to/dataset`` or ``path/to/dataset``.
- a memory path of the form ``mem://path/to/dataset`` which doesn't save the dataset but keeps it in memory instead. Should be used only for testing as it does not persist.
dest (str, pathlib.Path): Folder where the YOLO format files will be saved.
src_creds (Optional[Union[str, Dict]]): Credentials to access the source data. If not provided, will be inferred from the environment.
progressbar (Optional[bool]): Enables or disables export progress bar. Set to ``True`` by default.
token (Optional[str]): The token to use for accessing the source dataset.
image_tensor (Optional[str]): The name of the tensor containing the images.
label_tensor (Optional[str]): The name of the tensor containing the labels.
box_tensor (Optional[str]): The name of the tensor containing the bounding boxes.
limit (Optional[int]): The maximum number of samples to export. Unlimited by default
Raises:
DatasetCorruptError: If loading source dataset fails with DatasetCorruptedError
"""

feature_report_path(
dest,
"export_yolo",
{},
token=token,
)

os.makedirs(dest, exist_ok=True)

# Check if directory is empty and warn if it's not
if os.listdir(dest):
logger.warning(
f"Destination directory {dest} is not empty. Existing files are unknown and might cause issues when using the files in the destination directory for YOLO training."
)

if isinstance(src, (str, pathlib.Path)):
src = convert_pathlib_to_string_if_needed(src)
try:
src_ds = deeplake.load(
src, read_only=True, creds=src_creds, token=token, verbose=False
)
except DatasetCorruptError as e:
raise DatasetCorruptError(
"The source dataset is corrupted.",
"You can try to fix this by loading the dataset with `reset=True` "
"which will attempt to reset uncommitted HEAD changes and load the previous version.",
e.__cause__,
)
else:
src_ds = src

export = YoloExport(
src_ds, dest, box_tensor, label_tensor, image_tensor, progressbar, limit
)

export.export_data()

@staticmethod
@spinner
def query(query_string: str, token: Optional[str] = "") -> Dataset:
Expand Down
1 change: 1 addition & 0 deletions deeplake/api/tests/test_access_method.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ def test_access_method(s3_ds_generator):


@pytest.mark.slow
@pytest.mark.flaky(reruns=3)
def test_access_method_with_creds(
hub_cloud_ds_generator, hub_cloud_dev_managed_creds_key
):
Expand Down
2 changes: 2 additions & 0 deletions deeplake/api/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -1159,6 +1159,7 @@ def test_compressions_list():
"png",
"ppm",
"sgi",
"stl",
"tga",
"tiff",
"wav",
Expand Down Expand Up @@ -2129,6 +2130,7 @@ def test_text_label(local_ds_generator):

@pytest.mark.parametrize("scheduler", ["threaded", "processed"])
@pytest.mark.parametrize("num_workers", [0, 2])
@pytest.mark.flaky(retry_count=3)
def test_text_labels_transform(local_ds_generator, scheduler, num_workers):
with local_ds_generator() as ds:
ds.create_tensor("labels", htype="class_label")
Expand Down
32 changes: 31 additions & 1 deletion deeplake/api/tests/test_mesh.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
import pytest

import deeplake
from deeplake.util.exceptions import DynamicTensorNumpyError
import numpy as np
from deeplake.util.exceptions import (
DynamicTensorNumpyError,
MeshTensorMetaMissingRequiredValue,
UnsupportedCompressionError,
)


def test_mesh(local_ds, mesh_paths):
Expand Down Expand Up @@ -31,3 +36,28 @@ def test_mesh(local_ds, mesh_paths):

tensor_data = tensor.data()
assert len(tensor_data) == 4


def test_stl_mesh(local_ds, stl_mesh_paths):
tensor = local_ds.create_tensor("stl_mesh", htype="mesh", sample_compression="stl")

with pytest.raises(UnsupportedCompressionError):
local_ds.create_tensor("unsupported", htype="mesh", sample_compression=None)

with pytest.raises(MeshTensorMetaMissingRequiredValue):
local_ds.create_tensor("unsupported", htype="mesh")

for i, (_, path) in enumerate(stl_mesh_paths.items()):
sample = deeplake.read(path)
tensor.append(sample)
tensor.append(deeplake.read(path))

tensor_numpy = tensor.numpy()
assert tensor_numpy.shape == (4, 12, 3, 3)
assert np.all(tensor_numpy[0] == tensor_numpy[1])
assert np.all(tensor_numpy[1] == tensor_numpy[2])
assert np.all(tensor_numpy[2] == tensor_numpy[3])

tensor_data = tensor.data()
tensor_0_data = tensor[0].data()
assert np.all(tensor_data["vertices"][0] == tensor_0_data["vertices"])
1 change: 1 addition & 0 deletions deeplake/api/tests/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ def test_view_from_different_commit(local_ds):


@pytest.mark.slow
@pytest.mark.flaky(retry_count=3)
def test_save_view_ignore_errors(local_ds):
with local_ds as ds:
ds.create_tensor(
Expand Down
88 changes: 88 additions & 0 deletions deeplake/auto/tests/test_yolo_template.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,58 @@
import deeplake
import pytest
from deeplake.util.exceptions import IngestionError
import numpy as np
from click.testing import CliRunner


def create_yolo_export_dataset_basic():
ds = deeplake.empty("mem://dummy")

image_shape = (100, 100, 3)

with ds:
ds.create_tensor("images", htype="image", sample_compression="png")
ds.create_tensor("boxes_ltwh", htype="bbox")
ds.create_tensor(
"labels", htype="class_label", class_names=["class_1", "class_2", "class_3"]
)

# Create numpy image array with random data
ds.extend(
{
"images": [
np.random.randint(0, 255, image_shape, dtype=np.uint8),
np.random.randint(0, 255, image_shape, dtype=np.uint8),
np.random.randint(0, 255, image_shape, dtype=np.uint8),
],
"boxes_ltwh": [
np.array([[0, 0, 50, 50], [25, 25, 75, 75]], dtype=np.float32),
np.array([[10, 10, 20, 20]], dtype=np.float32),
None,
],
"labels": [np.array([0, 1]), np.array([0]), None],
}
)

return ds


def create_yolo_export_dataset_complex():

ds = create_yolo_export_dataset_basic()

with ds:
ds.create_tensor(
"boxes_ccwh", htype="bbox", coords={"type": "pixel", "mode": "ccwh"}
)
ds.boxes_ccwh.extend(
[
np.array([[25, 25, 50, 50], [50, 50, 75, 75]], dtype=np.float32),
np.array([[50, 50, 20, 20]], dtype=np.float32),
None,
],
)
return ds


@pytest.mark.parametrize("shuffle", [True, False])
Expand Down Expand Up @@ -176,3 +228,39 @@ def test_minimal_yolo_ingestion_with_linked_images(
assert "labels" in ds.tensors
assert len(ds.labels.info["class_names"]) > 0
assert ds.linked_images.htype == "link[image]"


def text_export_yolo_basic():
"""Basic test for export_yolo function to see if it runs without errors"""

ds = create_yolo_export_dataset_basic()

with CliRunner().isolated_filesystem():
deeplake.export_yolo(ds, "/basic")


def text_export_yolo_edge_cases():

ds = create_yolo_export_dataset_complex()

with CliRunner().isolated_filesystem():
deeplake.export_yolo(
ds,
"/custom_boxes",
box_tensor="boxes_ccwh",
label_tensor="labels",
image_tensor="images",
limit=1,
)

# Check for error about correct tensors not being found
ds_empty = deeplake.empty("mem://dummy")
with pytest.raises(ValueError):
with CliRunner().isolated_filesystem():
deeplake.export_yolo(ds_empty, "/no_tensors")

# Check for error about class names not being present
ds_empty = deeplake.empty("mem://dummy")
with pytest.raises(ValueError):
with CliRunner().isolated_filesystem():
deeplake.export_yolo(ds_empty, "/no_class_names")
Loading

0 comments on commit b712f6f

Please sign in to comment.