Skip to content

Commit

Permalink
order dataset in list_datasets (#665)
Browse files Browse the repository at this point in the history
  • Loading branch information
mattseddon authored Dec 6, 2024
1 parent 7c9d193 commit 7c186ac
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 2 deletions.
5 changes: 3 additions & 2 deletions src/datachain/data_storage/metastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -725,9 +725,10 @@ def _base_list_datasets_query(self):

def list_datasets(self) -> Iterator["DatasetListRecord"]:
"""Lists all datasets."""
yield from self._parse_dataset_list(
self.db.execute(self._base_list_datasets_query())
query = self._base_list_datasets_query().order_by(
self._datasets.c.name, self._datasets_versions.c.version
)
yield from self._parse_dataset_list(self.db.execute(query))

def list_datasets_by_prefix(
self, prefix: str, conn=None
Expand Down
32 changes: 32 additions & 0 deletions tests/func/test_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -772,6 +772,38 @@ def test_dataset_stats(test_session):
assert dataset_version2.size == 18


def test_ls_datasets_ordered(test_session):
ids = [1, 2, 3]
values = tuple(zip(["a", "b", "c"], ids))

assert not list(test_session.catalog.ls_datasets())

dc = DataChain.from_values(
ids=ids,
file=[File(path=name, size=size) for name, size in values],
session=test_session,
)
dc.save("cats")
dc.save("dogs")
dc.save("cats")
dc.save("cats")
dc.save("cats")
datasets = list(test_session.catalog.ls_datasets())

assert [
(d.name, v.version)
for d in datasets
for v in d.versions
if not d.name.startswith("session_")
] == [
("cats", 1),
("cats", 2),
("cats", 3),
("cats", 4),
("dogs", 1),
]


def test_ls_datasets_no_json(test_session):
ids = [1, 2, 3]
values = tuple(zip(["a", "b", "c"], [1, 2, 3]))
Expand Down

0 comments on commit 7c186ac

Please sign in to comment.