From c88ab48b29886cc5db98337cd47393099aac1601 Mon Sep 17 00:00:00 2001 From: Roshan Pawar Date: Fri, 21 Jul 2023 13:43:33 -0700 Subject: [PATCH 1/8] fix: registry df + remote path bugs + registry column keyerror --- latch/registry/record.py | 3 +++ latch/registry/table.py | 37 +++++++++++++++++++++++++++++++++ latch/types/directory.py | 12 +++++++---- latch/types/file.py | 15 +++++++------- latch/types/utils.py | 45 ++++++++++++++++++++++++++++++++++++++++ requirements.in | 1 + requirements.txt | 8 ++++--- 7 files changed, 107 insertions(+), 14 deletions(-) diff --git a/latch/registry/record.py b/latch/registry/record.py index 5710c68d..d72129b4 100644 --- a/latch/registry/record.py +++ b/latch/registry/record.py @@ -162,6 +162,9 @@ def load(self) -> None: if not c.upstream_type["allowEmpty"]: vals[k] = InvalidValue("") + # prevent keyerrors when accessing columns that don't have a value + vals[k] = None + self._cache.values = vals # get_name diff --git a/latch/registry/table.py b/latch/registry/table.py index a3c9386d..cdfca802 100644 --- a/latch/registry/table.py +++ b/latch/registry/table.py @@ -21,6 +21,7 @@ import gql import graphql.language as l import graphql.language.parser as lp +import pandas as pd from latch_sdk_gql.execute import execute from latch_sdk_gql.utils import ( _GqlJsonValue, @@ -265,6 +266,42 @@ def list_records(self, *, page_size: int = 100) -> Iterator[Dict[str, Record]]: if len(page) > 0: yield page + @overload + def get_dataframe( + self, *, load_if_missing: Literal[True] = True + ) -> Optional[pd.DataFrame]: + ... + + @overload + def get_dataframe(self, *, load_if_missing: bool) -> Optional[pd.DataFrame]: + ... + + def get_dataframe(self, *, load_if_missing: bool = True) -> Optional[pd.DataFrame]: + """Get a pandas DataFrame of all records in this table. + + Returns: + DataFrame reperesenting all records in this table. + """ + + records = [] + for page in self.list_records(): + for record_id, record in page.items(): + full_record = record.get_values(load_if_missing=load_if_missing) + if full_record is not None: + full_record["Sample_Name"] = record.get_name( + load_if_missing=load_if_missing + ) + records.append(full_record) + + if len(records) == 0: + cols = self.get_columns(load_if_missing=load_if_missing) + if cols is None: + return None + + return pd.DataFrame(columns=list(cols.keys())) + + return pd.DataFrame(records) + @contextmanager def update(self, *, reload_on_commit: bool = True) -> Iterator["TableUpdate"]: """Start an update transaction. diff --git a/latch/types/directory.py b/latch/types/directory.py index 225a017e..2494aa10 100644 --- a/latch/types/directory.py +++ b/latch/types/directory.py @@ -16,7 +16,7 @@ from typing_extensions import Annotated from latch.types.file import LatchFile -from latch.types.utils import _is_valid_url +from latch.types.utils import _is_valid_url, format_path from latch_cli.services.cp.path_utils import normalize_path from latch_cli.utils import urljoins @@ -182,13 +182,17 @@ def remote_path(self) -> Optional[str]: def __repr__(self): if self.remote_path is None: - return f'LatchDir("{self.local_path}")' - return f'LatchDir("{self.path}", remote_path="{self.remote_path}")' + formatted_path = format_path(self.local_path) + return f'LatchDir("{formatted_path}")' + + formatted_path = format_path(self.remote_path) + return f'LatchDir("{self.path}", remote_path="{formatted_path}")' def __str__(self): if self.remote_path is None: return "LatchDir()" - return f'LatchDir("{self.remote_path}")' + formatted_path = format_path(self.remote_path) + return f'LatchDir("{formatted_path}")' LatchOutputDir = Annotated[ diff --git a/latch/types/file.py b/latch/types/file.py index 7ebe07e7..86114a14 100644 --- a/latch/types/file.py +++ b/latch/types/file.py @@ -1,4 +1,3 @@ -import re from os import PathLike from typing import Optional, Type, Union from urllib.parse import urlparse @@ -12,11 +11,9 @@ from latch_sdk_gql.execute import execute from typing_extensions import Annotated -from latch.types.utils import _is_valid_url +from latch.types.utils import _is_valid_url, format_path, is_absolute_node_path from latch_cli.services.cp.path_utils import normalize_path -is_absolute_node_path = re.compile(r"^(latch)?://\d+.node(/)?$") - class LatchFile(FlyteFile): """Represents a file object in the context of a task execution. @@ -133,13 +130,17 @@ def remote_path(self) -> Optional[str]: def __repr__(self): if self.remote_path is None: - return f'LatchFile("{self.local_path}")' - return f'LatchFile("{self.path}", remote_path="{self.remote_path}")' + formatted_path = format_path(self.local_path) + return f'LatchFile("{formatted_path}")' + + formatted_path = format_path(self.remote_path) + return f'LatchFile("{self.path}", remote_path="{formatted_path}")' def __str__(self): if self.remote_path is None: return "LatchFile()" - return f'LatchFile("{self.remote_path}")' + formatted_path = format_path(self.remote_path) + return f'LatchFile("{formatted_path}")' LatchOutputFile = Annotated[ diff --git a/latch/types/utils.py b/latch/types/utils.py index 50490331..f37dbdae 100644 --- a/latch/types/utils.py +++ b/latch/types/utils.py @@ -1,7 +1,11 @@ +import re from pathlib import Path from typing import Union from urllib.parse import urlparse +import gql +from latch_sdk_gql.execute import execute + def _is_valid_url(raw_url: Union[str, Path]) -> bool: """A valid URL (as a source or destination of a LatchFile) must: @@ -17,3 +21,44 @@ def _is_valid_url(raw_url: Union[str, Path]) -> bool: if parsed.path != "" and not parsed.path.startswith("/"): return False return True + + +is_absolute_node_path = re.compile(r"^(latch)?://(?P\d+).node(/)?$") + + +def format_path(path: str) -> str: + match = is_absolute_node_path.match(path) + + if not match: + return path + + node_id = match.group("node_id") + + data = execute( + gql.gql(""" + query ldataGetPathQ($id: BigInt!) { + ldataGetPath(argNodeId: $id) + ldataOwner(argNodeId: $id) + } + """), + {"id": node_id}, + ) + + raw_path = data["ldataGetPath"] + owner = data["ldataOwner"] + + if raw_path is None: + return path + + path_split = raw_path.split("/") + + if path_split[0] == "mount": + mount_name = path_split[1] + fpath = "/".join(path_split[2:]) + return f"latch://{mount_name}.mount/{fpath}" + + if path_split[0] == "account_root": + fpath = "/".join(path_split[2:]) + return f"latch://{owner}.account/{fpath}" + + return path diff --git a/requirements.in b/requirements.in index ab0fdb08..6212be2f 100644 --- a/requirements.in +++ b/requirements.in @@ -20,3 +20,4 @@ asyncssh==2.12.0 watchfiles==0.18.1 gql==3.4.0 aiohttp==3.8.4 +pandas==1.3.5 diff --git a/requirements.txt b/requirements.txt index 744ca357..11c93fd4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.10 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile requirements.in @@ -137,7 +137,7 @@ kubernetes==24.2.0 # lytekitplugins-pods lyteidl==0.2.0a0 # via lytekit -lytekit==0.14.10 +lytekit==0.14.13 # via # -r requirements.in # lytekitplugins-pods @@ -170,7 +170,9 @@ numpy==1.21.6 oauthlib==3.2.0 # via requests-oauthlib pandas==1.3.5 - # via lytekit + # via + # -r requirements.in + # lytekit paramiko==2.11.0 # via # -r requirements.in From 903ea38b06d153c0b4d430fdd988de8cfc23e6d5 Mon Sep 17 00:00:00 2001 From: Roshan Pawar Date: Fri, 21 Jul 2023 13:53:32 -0700 Subject: [PATCH 2/8] 3.10 requirements --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 11c93fd4..c73371c2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.9 +# This file is autogenerated by pip-compile with Python 3.10 # by the following command: # # pip-compile requirements.in @@ -137,7 +137,7 @@ kubernetes==24.2.0 # lytekitplugins-pods lyteidl==0.2.0a0 # via lytekit -lytekit==0.14.13 +lytekit==0.14.10 # via # -r requirements.in # lytekitplugins-pods From 3a4ace6fdf23bf56619482fd834235e739567e7f Mon Sep 17 00:00:00 2001 From: Roshan Pawar Date: Mon, 24 Jul 2023 12:35:13 -0700 Subject: [PATCH 3/8] pr comments --- latch/registry/table.py | 26 +++++++------------------- latch/types/file.py | 10 +++------- latch/types/utils.py | 3 +-- 3 files changed, 11 insertions(+), 28 deletions(-) diff --git a/latch/registry/table.py b/latch/registry/table.py index cdfca802..1cfd2fff 100644 --- a/latch/registry/table.py +++ b/latch/registry/table.py @@ -266,37 +266,25 @@ def list_records(self, *, page_size: int = 100) -> Iterator[Dict[str, Record]]: if len(page) > 0: yield page - @overload - def get_dataframe( - self, *, load_if_missing: Literal[True] = True - ) -> Optional[pd.DataFrame]: - ... - - @overload - def get_dataframe(self, *, load_if_missing: bool) -> Optional[pd.DataFrame]: - ... - - def get_dataframe(self, *, load_if_missing: bool = True) -> Optional[pd.DataFrame]: + def get_dataframe(self) -> pd.DataFrame: """Get a pandas DataFrame of all records in this table. Returns: - DataFrame reperesenting all records in this table. + DataFrame representing all records in this table. """ records = [] for page in self.list_records(): - for record_id, record in page.items(): - full_record = record.get_values(load_if_missing=load_if_missing) + for record in page.values(): + full_record = record.get_values() if full_record is not None: - full_record["Sample_Name"] = record.get_name( - load_if_missing=load_if_missing - ) + full_record["Name"] = record.get_name() records.append(full_record) if len(records) == 0: - cols = self.get_columns(load_if_missing=load_if_missing) + cols = self.get_columns() if cols is None: - return None + return pd.DataFrame() return pd.DataFrame(columns=list(cols.keys())) diff --git a/latch/types/file.py b/latch/types/file.py index 86114a14..05e45e49 100644 --- a/latch/types/file.py +++ b/latch/types/file.py @@ -130,17 +130,13 @@ def remote_path(self) -> Optional[str]: def __repr__(self): if self.remote_path is None: - formatted_path = format_path(self.local_path) - return f'LatchFile("{formatted_path}")' - - formatted_path = format_path(self.remote_path) - return f'LatchFile("{self.path}", remote_path="{formatted_path}")' + return f'LatchFile("{format_path(self.local_path)}")' + return f'LatchFile("{self.path}", remote_path="{format_path(self.remote_path)}")' def __str__(self): if self.remote_path is None: return "LatchFile()" - formatted_path = format_path(self.remote_path) - return f'LatchFile("{formatted_path}")' + return f'LatchFile("{format_path(self.remote_path)}")' LatchOutputFile = Annotated[ diff --git a/latch/types/utils.py b/latch/types/utils.py index f37dbdae..79892331 100644 --- a/latch/types/utils.py +++ b/latch/types/utils.py @@ -25,11 +25,10 @@ def _is_valid_url(raw_url: Union[str, Path]) -> bool: is_absolute_node_path = re.compile(r"^(latch)?://(?P\d+).node(/)?$") - def format_path(path: str) -> str: match = is_absolute_node_path.match(path) - if not match: + if match is None: return path node_id = match.group("node_id") From 50067fa2890d356e206c78d6498eddc00ec57690 Mon Sep 17 00:00:00 2001 From: Roshan Pawar Date: Wed, 2 Aug 2023 14:46:58 -0700 Subject: [PATCH 4/8] fix: make pandas optional dependency --- latch/registry/table.py | 17 +++++++++++------ setup.py | 3 +++ 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/latch/registry/table.py b/latch/registry/table.py index 1cfd2fff..fe74ab75 100644 --- a/latch/registry/table.py +++ b/latch/registry/table.py @@ -21,7 +21,6 @@ import gql import graphql.language as l import graphql.language.parser as lp -import pandas as pd from latch_sdk_gql.execute import execute from latch_sdk_gql.utils import ( _GqlJsonValue, @@ -266,13 +265,21 @@ def list_records(self, *, page_size: int = 100) -> Iterator[Dict[str, Record]]: if len(page) > 0: yield page - def get_dataframe(self) -> pd.DataFrame: + def get_dataframe(self): """Get a pandas DataFrame of all records in this table. Returns: DataFrame representing all records in this table. """ + try: + import pandas as pd + except ImportError: + raise ImportError( + "pandas needs to be installed to use get_dataframe. Install it with" + " `pip install pandas` or `pip install latch[pandas]`." + ) + records = [] for page in self.list_records(): for record in page.values(): @@ -617,10 +624,8 @@ def upsert_column( raise InvalidColumnTypeError( key, type, - ( - f"Enum value for {repr(f.name)} ({repr(f.value)}) is not a" - " string" - ), + f"Enum value for {repr(f.name)} ({repr(f.value)}) is not a" + " string", ) members.append(f.value) diff --git a/setup.py b/setup.py index 051325ab..d4ed0d5c 100644 --- a/setup.py +++ b/setup.py @@ -49,6 +49,9 @@ "websockets==11.0.3", "watchfiles==0.19.0", ], + extras_require={ + "pandas": ["pandas==2.0.3"], + }, classifiers=[ "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", From 2973d593bd2f323d80e51d2c193a24516276d1e9 Mon Sep 17 00:00:00 2001 From: Ayush Kamat Date: Thu, 26 Oct 2023 10:33:30 -0700 Subject: [PATCH 5/8] changelog Signed-off-by: Ayush Kamat --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index dc880085..4e87ec7c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,12 @@ Types of changes # Latch SDK Changelog +## 2.36.4 - 2023-10-25 + +### Added + +* Added ability to get a pandas Dataframe from a registry table. + ## 2.36.3 - 2023-10-25 ### Fixed From 95e3a49e8295fac92604e018a874243b25b4b1c8 Mon Sep 17 00:00:00 2001 From: Ayush Kamat Date: Thu, 26 Oct 2023 10:35:02 -0700 Subject: [PATCH 6/8] better pandas version req Signed-off-by: Ayush Kamat --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 44fbd4e0..3bfa4a97 100644 --- a/setup.py +++ b/setup.py @@ -52,7 +52,7 @@ ], extras_require={ "snakemake": ["snakemake>=7.18.0, <7.30.2"], - "pandas": ["pandas==2.0.3"], + "pandas": ["pandas>=2.0.0"], }, classifiers=[ "Programming Language :: Python :: 3.8", From a70ff7f448d581139b452867b9890b01fa25a4dd Mon Sep 17 00:00:00 2001 From: Ayush Kamat Date: Thu, 26 Oct 2023 11:40:28 -0700 Subject: [PATCH 7/8] fixies Signed-off-by: Ayush Kamat --- latch/types/directory.py | 8 ++++---- latch/types/file.py | 10 +++++----- latch/types/utils.py | 37 +++++++++++++++++++++++-------------- 3 files changed, 32 insertions(+), 23 deletions(-) diff --git a/latch/types/directory.py b/latch/types/directory.py index 28f1ea30..e9761774 100644 --- a/latch/types/directory.py +++ b/latch/types/directory.py @@ -17,7 +17,7 @@ from typing_extensions import Annotated from latch.types.file import LatchFile -from latch.types.utils import _is_valid_url, format_path +from latch.types.utils import _format_path, _is_valid_url from latch_cli.utils import urljoins from latch_cli.utils.path import normalize_path @@ -244,18 +244,18 @@ def remote_path(self) -> Optional[str]: def __repr__(self): if self.remote_path is None: - return f"LatchDir({repr(format_path(self.local_path))})" + return f"LatchDir({repr(_format_path(self.local_path))})" return ( f"LatchDir({repr(self.path)}," - f" remote_path={repr( format_path(self.remote_path))})" + f" remote_path={repr( _format_path(self.remote_path))})" ) def __str__(self): if self.remote_path is None: return "LatchDir()" - return f"LatchDir({format_path(self.remote_path)})" + return f"LatchDir({_format_path(self.remote_path)})" LatchOutputDir = Annotated[ diff --git a/latch/types/file.py b/latch/types/file.py index 737750d3..073dc42b 100644 --- a/latch/types/file.py +++ b/latch/types/file.py @@ -14,7 +14,7 @@ from latch_sdk_gql.execute import execute from typing_extensions import Annotated -from latch.types.utils import _is_valid_url, format_path, is_absolute_node_path +from latch.types.utils import _format_path, _is_absolute_node_path, _is_valid_url from latch_cli.utils.path import normalize_path @@ -94,7 +94,7 @@ def downloader(): and ctx.inspect_objects_only is False ): local_path_hint = self._remote_path - if is_absolute_node_path.match(self._remote_path) is not None: + if _is_absolute_node_path.match(self._remote_path) is not None: data = execute( gql.gql(""" query getName($argPath: String!) { @@ -154,17 +154,17 @@ def remote_path(self) -> Optional[str]: def __repr__(self): if self.remote_path is None: - return f"LatchFile({repr(format_path(self.local_path))})" + return f"LatchFile({repr(_format_path(self.local_path))})" return ( f"LatchFile({repr(self.path)}," - f" remote_path={repr(format_path(self.remote_path))})" + f" remote_path={repr(_format_path(self.remote_path))})" ) def __str__(self): if self.remote_path is None: return "LatchFile()" - return f"LatchFile({format_path(self.remote_path)})" + return f"LatchFile({_format_path(self.remote_path)})" LatchOutputFile = Annotated[ diff --git a/latch/types/utils.py b/latch/types/utils.py index 79892331..ed62c69f 100644 --- a/latch/types/utils.py +++ b/latch/types/utils.py @@ -1,6 +1,6 @@ import re from pathlib import Path -from typing import Union +from typing import Optional, Union from urllib.parse import urlparse import gql @@ -23,10 +23,13 @@ def _is_valid_url(raw_url: Union[str, Path]) -> bool: return True -is_absolute_node_path = re.compile(r"^(latch)?://(?P\d+).node(/)?$") +_is_absolute_node_path = re.compile(r"^(latch)?://(?P\d+).node(/)?$") -def format_path(path: str) -> str: - match = is_absolute_node_path.match(path) +_old_path_expr = re.compile(r"^(?:(?Paccount_root)|(?Pmount))") + + +def _format_path(path: str) -> str: + match = _is_absolute_node_path.match(path) if match is None: return path @@ -43,21 +46,27 @@ def format_path(path: str) -> str: {"id": node_id}, ) - raw_path = data["ldataGetPath"] - owner = data["ldataOwner"] - + raw_path: Optional[str] = data["ldataGetPath"] if raw_path is None: return path path_split = raw_path.split("/") - if path_split[0] == "mount": - mount_name = path_split[1] - fpath = "/".join(path_split[2:]) - return f"latch://{mount_name}.mount/{fpath}" + match = _old_path_expr.match(raw_path) + if match is None: + return path + + if match["mount"] is not None: + bucket = path_split[1] + key = "/".join(path_split[2:]) + return f"latch://{bucket}.mount/{key}" + + owner: Optional[str] = data["ldataOwner"] + if owner is None: + return path - if path_split[0] == "account_root": - fpath = "/".join(path_split[2:]) - return f"latch://{owner}.account/{fpath}" + if match["account_root"] is not None: + key = "/".join(path_split[2:]) + return f"latch://{owner}.account/{key}" return path From f86459caf385ad63327850cc7c4e5ce33b50d4d7 Mon Sep 17 00:00:00 2001 From: Ayush Kamat Date: Thu, 26 Oct 2023 12:49:11 -0700 Subject: [PATCH 8/8] rename some things Signed-off-by: Ayush Kamat --- latch/types/directory.py | 10 +++++----- latch/types/file.py | 12 ++++++------ latch/types/glob.py | 4 ++-- latch/types/utils.py | 26 ++++++++++++-------------- 4 files changed, 25 insertions(+), 27 deletions(-) diff --git a/latch/types/directory.py b/latch/types/directory.py index e9761774..41b72c49 100644 --- a/latch/types/directory.py +++ b/latch/types/directory.py @@ -17,7 +17,7 @@ from typing_extensions import Annotated from latch.types.file import LatchFile -from latch.types.utils import _format_path, _is_valid_url +from latch.types.utils import format_path, is_valid_url from latch_cli.utils import urljoins from latch_cli.utils.path import normalize_path @@ -112,7 +112,7 @@ def __init__( self._path_generated = False - if _is_valid_url(self.path) and remote_path is None: + if is_valid_url(self.path) and remote_path is None: self._remote_directory = self.path else: self._remote_directory = None if remote_path is None else str(remote_path) @@ -244,18 +244,18 @@ def remote_path(self) -> Optional[str]: def __repr__(self): if self.remote_path is None: - return f"LatchDir({repr(_format_path(self.local_path))})" + return f"LatchDir({repr(format_path(self.local_path))})" return ( f"LatchDir({repr(self.path)}," - f" remote_path={repr( _format_path(self.remote_path))})" + f" remote_path={repr( format_path(self.remote_path))})" ) def __str__(self): if self.remote_path is None: return "LatchDir()" - return f"LatchDir({_format_path(self.remote_path)})" + return f"LatchDir({format_path(self.remote_path)})" LatchOutputDir = Annotated[ diff --git a/latch/types/file.py b/latch/types/file.py index 073dc42b..ae334f49 100644 --- a/latch/types/file.py +++ b/latch/types/file.py @@ -14,7 +14,7 @@ from latch_sdk_gql.execute import execute from typing_extensions import Annotated -from latch.types.utils import _format_path, _is_absolute_node_path, _is_valid_url +from latch.types.utils import format_path, is_absolute_node_path, is_valid_url from latch_cli.utils.path import normalize_path @@ -75,7 +75,7 @@ def __init__( self._path_generated = False - if _is_valid_url(self.path) and remote_path is None: + if is_valid_url(self.path) and remote_path is None: self._remote_path = str(path) else: self._remote_path = None if remote_path is None else str(remote_path) @@ -94,7 +94,7 @@ def downloader(): and ctx.inspect_objects_only is False ): local_path_hint = self._remote_path - if _is_absolute_node_path.match(self._remote_path) is not None: + if is_absolute_node_path.match(self._remote_path) is not None: data = execute( gql.gql(""" query getName($argPath: String!) { @@ -154,17 +154,17 @@ def remote_path(self) -> Optional[str]: def __repr__(self): if self.remote_path is None: - return f"LatchFile({repr(_format_path(self.local_path))})" + return f"LatchFile({repr(format_path(self.local_path))})" return ( f"LatchFile({repr(self.path)}," - f" remote_path={repr(_format_path(self.remote_path))})" + f" remote_path={repr(format_path(self.remote_path))})" ) def __str__(self): if self.remote_path is None: return "LatchFile()" - return f"LatchFile({_format_path(self.remote_path)})" + return f"LatchFile({format_path(self.remote_path)})" LatchOutputFile = Annotated[ diff --git a/latch/types/glob.py b/latch/types/glob.py index ae9c8624..1dbee948 100644 --- a/latch/types/glob.py +++ b/latch/types/glob.py @@ -2,7 +2,7 @@ from typing import List, Optional from latch.types.file import LatchFile -from latch.types.utils import _is_valid_url +from latch.types.utils import is_valid_url def file_glob( @@ -41,7 +41,7 @@ def task(): """ - if not _is_valid_url(remote_directory): + if not is_valid_url(remote_directory): return [] if target_dir is None: diff --git a/latch/types/utils.py b/latch/types/utils.py index ed62c69f..dbe3376b 100644 --- a/latch/types/utils.py +++ b/latch/types/utils.py @@ -7,7 +7,7 @@ from latch_sdk_gql.execute import execute -def _is_valid_url(raw_url: Union[str, Path]) -> bool: +def is_valid_url(raw_url: Union[str, Path]) -> bool: """A valid URL (as a source or destination of a LatchFile) must: * contain a latch or s3 scheme * contain an absolute path @@ -23,14 +23,12 @@ def _is_valid_url(raw_url: Union[str, Path]) -> bool: return True -_is_absolute_node_path = re.compile(r"^(latch)?://(?P\d+).node(/)?$") +is_absolute_node_path = re.compile(r"^(latch)?://(?P\d+).node(/)?$") +old_style_path = re.compile(r"^(?:(?Paccount_root)|(?Pmount))") -_old_path_expr = re.compile(r"^(?:(?Paccount_root)|(?Pmount))") - - -def _format_path(path: str) -> str: - match = _is_absolute_node_path.match(path) +def format_path(path: str) -> str: + match = is_absolute_node_path.match(path) if match is None: return path @@ -46,19 +44,19 @@ def _format_path(path: str) -> str: {"id": node_id}, ) - raw_path: Optional[str] = data["ldataGetPath"] - if raw_path is None: + raw: Optional[str] = data["ldataGetPath"] + if raw is None: return path - path_split = raw_path.split("/") + parts = raw.split("/") - match = _old_path_expr.match(raw_path) + match = old_style_path.match(raw) if match is None: return path if match["mount"] is not None: - bucket = path_split[1] - key = "/".join(path_split[2:]) + bucket = parts[1] + key = "/".join(parts[2:]) return f"latch://{bucket}.mount/{key}" owner: Optional[str] = data["ldataOwner"] @@ -66,7 +64,7 @@ def _format_path(path: str) -> str: return path if match["account_root"] is not None: - key = "/".join(path_split[2:]) + key = "/".join(parts[2:]) return f"latch://{owner}.account/{key}" return path