Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add check_path() #70

Merged
merged 4 commits into from
Jun 28, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 73 additions & 27 deletions tsdb/utils/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,42 @@
from .logging import logger


def check_path(
path: str,
check_exists: bool = False,
) -> str:
"""Check the given path and return the absolute path.

Parameters
----------
path :
The path to be checked.

check_exists :
If True, check if the path exists, and will raise an AssertionError if the path does not exist.

Returns
-------
checked_path:
The absolute path of the given path.

"""
# expand the home dir if the path starts with "~"
if path.startswith("~"):
checked_path = path.replace("~", os.path.expanduser("~"))
else:
checked_path = path

checked_path = os.path.abspath(checked_path)

if check_exists:
assert os.path.exists(
checked_path
), f"The given path {checked_path} does not exists"

return checked_path


def pickle_dump(data: object, path: str) -> Optional[str]:
"""Pickle the given object.

Expand All @@ -31,6 +67,9 @@ def pickle_dump(data: object, path: str) -> Optional[str]:
`path` if succeed else None

"""
# check the given path
path = check_path(path)

try:
with open(path, "wb") as f:
pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL)
Expand All @@ -55,6 +94,8 @@ def pickle_load(path: str) -> object:
Pickled object.

"""
# check the given path
path = check_path(path, check_exists=True)
try:
with open(path, "rb") as f:
data = pickle.load(f)
Expand All @@ -76,9 +117,8 @@ def purge_path(path: str, ignore_errors: bool = True) -> None:
Errors are ignored if ignore_errors is set.

"""
assert os.path.exists(
path
), f"The given path {path} does not exists. Operation aborted."
# check the given path, no need to check if the path exists because ignore_errors is set
path = check_path(path)

try:
if os.path.isdir(path):
Expand All @@ -98,18 +138,19 @@ def purge_path(path: str, ignore_errors: bool = True) -> None:


def determine_data_home():
# default path
default_path = check_path("~/.pypots/tsdb")

# read data_home from the config file
# data_home may be changed by users, hence not necessarily equal to the default path
config = read_configs()
data_home_path = config.get("path", "data_home")
# replace '~' with the absolute path if existing in the path
data_home_path = data_home_path.replace("~", os.path.expanduser("~"))
data_home_path = check_path(data_home_path)

# old cached dataset dir path used in TSDB v0.2
old_cached_dataset_dir_02 = os.path.join(
os.path.expanduser("~"), ".tsdb_cached_datasets"
)
old_cached_dataset_dir_02 = check_path("~/.tsdb_cached_datasets")
# old cached dataset dir path used in TSDB v0.4
old_cached_dataset_dir_04 = os.path.join(os.path.expanduser("~"), ".tsdb")
old_cached_dataset_dir_04 = check_path("~/.tsdb")

if os.path.exists(old_cached_dataset_dir_02) or os.path.exists(
old_cached_dataset_dir_04
Expand All @@ -127,15 +168,18 @@ def determine_data_home():
# use the path directly, may be in a portable disk
cached_dataset_dir = data_home_path
else:
# use the default path for initialization,
# e.g. `data_home_path` in a portable disk but the disk is not connected
default_path = os.path.join(os.path.expanduser("~"), ".pypots", "tsdb")
cached_dataset_dir = default_path
if os.path.abspath(data_home_path) != os.path.abspath(default_path):
# if the preset data_home path does not exist,
# e.g. `data_home_path` is in a portable disk that is not connected
# then use the default path
if check_path(data_home_path) != check_path(default_path):
logger.warning(
f"️ The preset data_home path '{data_home_path}' doesn't exist. "
f"Using the default path '{default_path}'"
f"️ The preset data_home {data_home_path} doesn't exist. "
f"This may be caused by the portable disk not connected."
)
logger.warning(f"‼️ Using the default path {default_path} for now")

cached_dataset_dir = default_path

return cached_dataset_dir


Expand All @@ -151,30 +195,29 @@ def migrate(old_path: str, new_path: str) -> None:
The new path of the dataset.

"""
if not os.path.exists(old_path):
raise FileNotFoundError(f"Given old_path {old_path} does not exist.")
# check both old_path and new_path
old_path = check_path(old_path, check_exists=True)
new_path = check_path(new_path)

# create new_path if not exists
if not os.path.exists(new_path):
# if new_path does not exist, just rename the old_path into it
new_parent_dir = os.path.abspath(os.path.join(new_path, ".."))
if not os.path.exists(new_parent_dir):
os.makedirs(new_parent_dir, exist_ok=True)
os.makedirs(new_path, exist_ok=True)
else:
logger.warning(f"‼️ Note that new_path {new_path} already exists.")

logger.warning(f"‼️ Please note that new_path {new_path} already exists.")
# if new_path exists, we have to move everything from old_path into it
all_old_files = os.listdir(old_path)
for f in all_old_files:
old_f_path = os.path.join(old_path, f)

if os.path.isdir(old_f_path):
new_f_path = os.path.join(new_path, f)
shutil.copytree(old_f_path, new_f_path)
else:
shutil.move(old_f_path, new_path)
shutil.rmtree(old_path, ignore_errors=True)

logger.info(
f"Successfully migrated {old_path} to {new_path}, and deleted {old_path}"
)
logger.info(f"Successfully migrated {old_path} to {new_path}")
logger.info(f"Purged the old path {old_path}")


def migrate_cache(target_path: str) -> None:
Expand All @@ -186,6 +229,9 @@ def migrate_cache(target_path: str) -> None:
The new path for TSDB to store cached datasets.

"""
# check the target path
target_path = check_path(target_path)

cached_dataset_dir = determine_data_home()
migrate(cached_dataset_dir, target_path)
config_parser = read_configs()
Expand Down
Loading