diff --git a/cmflib/bin/cmf_dvc_ingest.py b/cmflib/bin/cmf_dvc_ingest.py index ff8bc3c1..0a92987b 100644 --- a/cmflib/bin/cmf_dvc_ingest.py +++ b/cmflib/bin/cmf_dvc_ingest.py @@ -63,6 +63,7 @@ def get_cmf_hierarchy(execution_lineage:str): execution_exist : True if it exeist, False otherwise metawrite: cmf object """ +tracked = {} #Used to keep a record of files tracked by outs and therefore not needed to be tracked in deps def ingest_metadata(execution_lineage:str, metadata:dict, metawriter:cmf.Cmf, command:str = "") : pipeline_name, context_name, execution = get_cmf_hierarchy(execution_lineage) @@ -77,9 +78,12 @@ def ingest_metadata(execution_lineage:str, metadata:dict, metawriter:cmf.Cmf, co if k == "deps": for dep in v: metawriter.log_dataset_with_version(dep["path"], dep["md5"], "input") + if dep["path"] not in tracked: + metawriter.log_dataset(dep["path"], 'input') if k == "outs": for out in v: metawriter.log_dataset_with_version(out["path"], out["md5"], "output") + tracked[out["path"]] = True def find_location(string, elements): for index, element in enumerate(elements): diff --git a/cmflib/dvc_wrapper.py b/cmflib/dvc_wrapper.py index e3af9ced..20262b1a 100644 --- a/cmflib/dvc_wrapper.py +++ b/cmflib/dvc_wrapper.py @@ -1,5 +1,5 @@ ### -# Copyright (2022) Hewlett Packard Enterprise Development LP +# Copyright (2024) Hewlett Packard Enterprise Development LP # # Licensed under the Apache License, Version 2.0 (the "License"); # You may not use this file except in compliance with the License. @@ -74,6 +74,12 @@ def dvc_get_url(folder: str, retry: bool = False, repo: str = "") -> str: url = dvc_get_url(folder, True) else: print(f"dvc.exceptions.PathMissingError Caught Unexpected {err}, {type(err)}") + except dvc.exceptions.OutputNotFoundError as err: + if not retry: + filename = folder.split('/')[-1] + folder = os.path.join(os.getcwd() , filename) + url = dvc_get_url(folder, True) + except Exception as err: print(f"Unexpected {err}, {type(err)}") return url @@ -137,6 +143,7 @@ def git_checkout_new_branch(branch_name: str): except Exception as err: process.kill() outs, errs = process.communicate() + print(f"Unexpected {err}, {type(err)}") print(f"Unexpected {outs}") print(f"Unexpected {errs}") @@ -230,14 +237,19 @@ def commit_output(folder: str, execution_id: str) -> str: commit = "" process = "" try: - process = subprocess.Popen(['dvc', 'add', folder], - stdout=subprocess.PIPE, - universal_newlines=True) - + if os.path.exists(os.getcwd() + '/' + folder): + process = subprocess.Popen(['dvc', 'add', folder], + stdout=subprocess.PIPE, + universal_newlines=True) + else: + process = subprocess.Popen(['dvc', 'import-url', '--to-remote', folder], + stdout=subprocess.PIPE, + universal_newlines=True) + # To-Do : Parse the output and report if error output, errs = process.communicate() commit = output.strip() - process = subprocess.Popen(['git', 'add', folder + '.dvc'], + process = subprocess.Popen(['git', 'add', folder.split('/')[-1] + '.dvc'], stdout=subprocess.PIPE, universal_newlines=True) # To-Do : Parse the output and report if error