From 9f58cade3e3c7ca572be839af7e0db6ec5848c1e Mon Sep 17 00:00:00 2001 From: AyeshaSanadi Date: Tue, 15 Oct 2024 12:34:32 +0530 Subject: [PATCH 01/41] Done git commit automatically, and Chage code to modify initialized new repo --- cmflib/cli/parser.py | 5 +- cmflib/commands/init/amazonS3.py | 5 + cmflib/commands/init/local.py | 4 + cmflib/commands/init/minioS3.py | 5 + cmflib/commands/repo/__init__.py | 42 ++++++++ cmflib/commands/repo/push.py | 98 +++++++++++++++++++ cmflib/dvc_wrapper.py | 20 ++++ examples/example-get-started/src/featurize.py | 1 + examples/example-get-started/src/parse.py | 1 + examples/example-get-started/src/test.py | 1 + examples/example-get-started/src/train.py | 1 + .../example-get-started/test-data-slice.py | 2 + 12 files changed, 182 insertions(+), 3 deletions(-) create mode 100644 cmflib/commands/repo/__init__.py create mode 100644 cmflib/commands/repo/push.py diff --git a/cmflib/cli/parser.py b/cmflib/cli/parser.py index b71e5233..2cea2ad6 100644 --- a/cmflib/cli/parser.py +++ b/cmflib/cli/parser.py @@ -20,11 +20,10 @@ import os import sys -from cmflib.commands import artifact, metadata, init - +from cmflib.commands import artifact, metadata, init, repo from cmflib.cli import CmfParserError -COMMANDS = [artifact, metadata, init] +COMMANDS = [artifact, metadata, init, repo] def _find_parser(parser, cmd_cls): diff --git a/cmflib/commands/init/amazonS3.py b/cmflib/commands/init/amazonS3.py index d12e6487..d009494b 100644 --- a/cmflib/commands/init/amazonS3.py +++ b/cmflib/commands/init/amazonS3.py @@ -30,6 +30,7 @@ dvc_quiet_init, dvc_add_remote_repo, dvc_add_attribute, + git_modify_remote_url, ) from cmflib.utils.cmf_config import CmfConfig from cmflib.utils.helper_functions import is_git_repo @@ -76,6 +77,10 @@ def run(self): git_initial_commit() git_add_remote(self.args.git_remote_url) print("git init complete.") + else: + git_modify_remote_url(self.args.git_remote_url) + print("git init complete.") + print("Starting cmf init.") dvc_quiet_init() diff --git a/cmflib/commands/init/local.py b/cmflib/commands/init/local.py index 6bed0f0f..a03f2e0a 100644 --- a/cmflib/commands/init/local.py +++ b/cmflib/commands/init/local.py @@ -30,6 +30,7 @@ dvc_quiet_init, dvc_add_remote_repo, dvc_add_attribute, + git_modify_remote_url, ) from cmflib.utils.cmf_config import CmfConfig from cmflib.utils.helper_functions import is_git_repo @@ -77,6 +78,9 @@ def run(self): git_initial_commit() git_add_remote(self.args.git_remote_url) print("git init complete.") + else: + git_modify_remote_url(self.args.git_remote_url) + print("git init complete.") print("Starting cmf init.") dvc_quiet_init() diff --git a/cmflib/commands/init/minioS3.py b/cmflib/commands/init/minioS3.py index 371da9a8..6a97a2bf 100644 --- a/cmflib/commands/init/minioS3.py +++ b/cmflib/commands/init/minioS3.py @@ -30,6 +30,7 @@ dvc_quiet_init, dvc_add_remote_repo, dvc_add_attribute, + git_modify_remote_url, ) from cmflib.utils.cmf_config import CmfConfig from cmflib.utils.helper_functions import is_git_repo @@ -76,6 +77,10 @@ def run(self): git_initial_commit() git_add_remote(self.args.git_remote_url) print("git init complete.") + else: + git_modify_remote_url(self.args.git_remote_url) + print("git init complete.") + print("Starting cmf init.") dvc_quiet_init() diff --git a/cmflib/commands/repo/__init__.py b/cmflib/commands/repo/__init__.py new file mode 100644 index 00000000..6d26f58f --- /dev/null +++ b/cmflib/commands/repo/__init__.py @@ -0,0 +1,42 @@ +### +# Copyright (2023) Hewlett Packard Enterprise Development LP +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# You may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +### + +import argparse + +from cmflib.commands.repo import push +from cmflib.cli.utils import * + +SUB_COMMANDS = [push] + +# This parser adds positional arguments to the main parser +def add_parser(subparsers, parent_parser): + REPO_HELP = "Command for repo push." + + metadata_parser = subparsers.add_parser( + "repo", + parents=[parent_parser], + description="Command repo push.", + help=REPO_HELP, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + + metadata_subparsers = metadata_parser.add_subparsers( + dest="cmd", help="Use `cmf repo CMD --help` for " "command-specific help." + ) + + fix_subparsers(metadata_subparsers) + for cmd in SUB_COMMANDS: + cmd.add_parser(metadata_subparsers, parent_parser) diff --git a/cmflib/commands/repo/push.py b/cmflib/commands/repo/push.py new file mode 100644 index 00000000..31ff80f4 --- /dev/null +++ b/cmflib/commands/repo/push.py @@ -0,0 +1,98 @@ +### +# Copyright (2024) Hewlett Packard Enterprise Development LP +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# You may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +### + +#!/usr/bin/env python3 +import argparse +import os +import subprocess +import requests + +from cmflib.cli.command import CmdBase +from cmflib.dvc_wrapper import dvc_get_config, git_get_repo, git_checkout_new_branch + + +class CmdRepoPush(CmdBase): + def branch_exists(self, repo_own, repo_name, branch_name): + url = f"https://api.github.com/repos/{repo_own}/{repo_name}/branches/{branch_name}" + res = requests.get(url) + + if res.status_code == 200: + return True + return False + + + def run_command(self, command, cwd=None): + process = subprocess.Popen(command, cwd=cwd, shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + stdout, stderr = process.communicate() + return (stdout.decode('utf-8').strip() if stdout else '', + stderr.decode('utf-8').strip() if stderr else '', + process.returncode) + + + def run(self): + msg = "'cmf' is not configured.\nExecute 'cmf init' command." + result = dvc_get_config() + if len(result) == 0: + return msg + else: + # checking if the current branch is cmf_origin or not + if "mlmd" in self.run_command("git branch")[0]: + url = git_get_repo() + if self.branch_exists(url.split("/")[-2], url.split("/")[-1], "mlmd"): + # pull the code + # push the code + stdout, stderr, returncode = self.run_command("git pull cmf_origin mlmd") + # print(returncode+"1") + if returncode != 0: + return f"Error pulling changes: {stderr}" + + stdout, stderr, returncode = self.run_command("git push -u cmf_origin mlmd") + if returncode != 0: + return f"Error pushing changes: {stderr}" + + return "Successfully pushed and pulled changes!" + else: + # push the code + stdout, stderr, returncode = self.run_command("git push -u cmf_origin mlmd") + if returncode != 0: + return f"Error pushing changes: {stderr}" + return "Successfully pushed and pulled changes!" + else: + if self.args.file_name: + git_checkout_new_branch(self.args.file_name) + else: + git_checkout_new_branch("mlmd") + return "Checking out new branch" + + +def add_parser(subparsers, parent_parser): + PUSH_HELP = "Push user-generated mlmd to server to create one single mlmd file for all the pipelines." + + parser = subparsers.add_parser( + "push", + parents=[parent_parser], + description="Push user's mlmd to cmf-server.", + help=PUSH_HELP, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + + parser.add_argument( + "-f", "--file_name", help="Specify mlmd file name.", metavar="" + ) + + parser.set_defaults(func=CmdRepoPush) diff --git a/cmflib/dvc_wrapper.py b/cmflib/dvc_wrapper.py index 0fcf1d6e..b68a35bf 100644 --- a/cmflib/dvc_wrapper.py +++ b/cmflib/dvc_wrapper.py @@ -472,3 +472,23 @@ def dvc_push(file_list: Optional[List[str]] = None) -> str: print(f"Unexpected {outs}") print(f"Unexpected {errs}") return commit + + +# Change the existing remote repo url +def git_modify_remote_url(git_url) -> str: + commit = "" + try: + process = subprocess.Popen(['git', 'remote', 'set-url', 'cmf_origin', f"{git_url}"], + stdout=subprocess.PIPE, + universal_newlines=True) + output, errs = process.communicate(timeout=60) + commit = output.strip() + + except Exception as err: + print(f"Unexpected {err}, {type(err)}") + if isinstance(object, subprocess.Popen): + process.kill() + outs, errs = process.communicate() + print(f"Unexpected {outs}") + print(f"Unexpected {errs}") + return commit diff --git a/examples/example-get-started/src/featurize.py b/examples/example-get-started/src/featurize.py index 3a9e594c..448471e2 100644 --- a/examples/example-get-started/src/featurize.py +++ b/examples/example-get-started/src/featurize.py @@ -108,6 +108,7 @@ def featurize(input_dir: str, output_dir: str) -> None: _ = metawriter.log_dataset(output_ds.train, "output") _ = metawriter.log_dataset(output_ds.test, "output") + metawriter.finalize() @click.command() diff --git a/examples/example-get-started/src/parse.py b/examples/example-get-started/src/parse.py index 87992ce4..51e4f195 100644 --- a/examples/example-get-started/src/parse.py +++ b/examples/example-get-started/src/parse.py @@ -77,6 +77,7 @@ def parse(input_file: str, output_dir: str) -> None: _ = metawriter.log_dataset(output_ds.train, "output") _ = metawriter.log_dataset(output_ds.test, "output") + metawriter.finalize() @click.command() diff --git a/examples/example-get-started/src/test.py b/examples/example-get-started/src/test.py index ae55e032..5718087f 100644 --- a/examples/example-get-started/src/test.py +++ b/examples/example-get-started/src/test.py @@ -58,6 +58,7 @@ def test(model_dir: str, dataset_dir: str, output_dir: str) -> None: model_name="RandomForest_default" ) _ = metawriter.log_dataset(artifacts.dataset, "input") + metawriter.finalize() with open(artifacts.model, "rb") as fd: model = pickle.load(fd) diff --git a/examples/example-get-started/src/train.py b/examples/example-get-started/src/train.py index eb456e14..9a4d5e7d 100644 --- a/examples/example-get-started/src/train.py +++ b/examples/example-get-started/src/train.py @@ -72,6 +72,7 @@ def train(input_dir: str, output_dir: str) -> None: path=model_file, event="output", model_framework="SKlearn", model_type="RandomForestClassifier", model_name="RandomForestClassifier:default" ) + metawriter.finalize() @click.command() diff --git a/examples/example-get-started/test-data-slice.py b/examples/example-get-started/test-data-slice.py index a25d1c95..5b4692e7 100644 --- a/examples/example-get-started/test-data-slice.py +++ b/examples/example-get-started/test-data-slice.py @@ -87,3 +87,5 @@ def generate_dataset(): for label, content in df.iterrows(): if label == record: print(content) + +metawriter.finalize() \ No newline at end of file From ea66a3f03a4b944764caad51856383fe6bbae192 Mon Sep 17 00:00:00 2001 From: AyeshaSanadi Date: Fri, 18 Oct 2024 10:51:35 +0530 Subject: [PATCH 02/41] Added code for cmf repo push/pull --- cmflib/cli/__init__.py | 2 + cmflib/cli/parser.py | 4 +- cmflib/commands/artifact/pull.py | 5 + cmflib/commands/artifact/push.py | 3 + cmflib/commands/metadata/pull.py | 4 + cmflib/commands/metadata/push.py | 3 + cmflib/commands/repo/__init__.py | 4 +- cmflib/commands/repo/pull.py | 130 ++++++++++++++++++++++++ cmflib/commands/repo/push.py | 166 ++++++++++++++++++++++++------- 9 files changed, 280 insertions(+), 41 deletions(-) create mode 100644 cmflib/commands/repo/pull.py diff --git a/cmflib/cli/__init__.py b/cmflib/cli/__init__.py index 58e21183..09419332 100644 --- a/cmflib/cli/__init__.py +++ b/cmflib/cli/__init__.py @@ -53,6 +53,7 @@ def main(argv=None): try: args = parse_args(argv) cmd = args.func(args) + print(cmd) msg = cmd.do_run() print(msg) except CmfParserError: @@ -61,3 +62,4 @@ def main(argv=None): print("Interrupted by the user") except Exception as e: print(e) + return diff --git a/cmflib/cli/parser.py b/cmflib/cli/parser.py index dc59407d..80a3265e 100644 --- a/cmflib/cli/parser.py +++ b/cmflib/cli/parser.py @@ -17,11 +17,11 @@ """Main parser for the cmf cli""" import argparse -from cmflib.commands import artifact, metadata, init, execution, pipeline +from cmflib.commands import artifact, metadata, init, execution, pipeline, repo from cmflib.cli import CmfParserError -COMMANDS = [artifact, metadata, init, execution, pipeline] +COMMANDS = [artifact, metadata, init, execution, pipeline, repo] def _find_parser(parser, cmd_cls): diff --git a/cmflib/commands/artifact/pull.py b/cmflib/commands/artifact/pull.py index 0cbdded0..cf154f58 100644 --- a/cmflib/commands/artifact/pull.py +++ b/cmflib/commands/artifact/pull.py @@ -32,6 +32,9 @@ class CmdArtifactPull(CmdBase): + def __init__(self, args): + self.args = args + def split_url_pipeline(self, url: str, pipeline_name: str): # This function takes url and pipeline_name as a input parameter # return string which contains the artifact repo path of the artifact @@ -158,6 +161,7 @@ def search_artifact(self, input_dict): pass def run(self): + print("inside artifact pull run command") # check whether the mlmd file exist or not in current directory current_directory = os.getcwd() mlmd_file_name = "./mlmd" @@ -206,6 +210,7 @@ def run(self): if type(output) is not dict: return output dvc_config_op = output + print("dvc_config_output: ", dvc_config_op) if dvc_config_op["core.remote"] == "minio": minio_class_obj = minio_artifacts.MinioArtifacts() diff --git a/cmflib/commands/artifact/push.py b/cmflib/commands/artifact/push.py index 6efd5afb..643cbacc 100644 --- a/cmflib/commands/artifact/push.py +++ b/cmflib/commands/artifact/push.py @@ -29,6 +29,9 @@ from cmflib.utils.cmf_config import CmfConfig class CmdArtifactPush(CmdBase): + def __init__(self, args): + self.args = args + def run(self): result = "" dvc_config_op = DvcConfig.get_dvc_config() diff --git a/cmflib/commands/metadata/pull.py b/cmflib/commands/metadata/pull.py index 7c50cbf7..f1ebfc50 100644 --- a/cmflib/commands/metadata/pull.py +++ b/cmflib/commands/metadata/pull.py @@ -26,6 +26,10 @@ # This class pulls mlmd file from cmf-server class CmdMetadataPull(CmdBase): + + def __init__(self, args): + self.args = args + def run(self): cmfconfig = os.environ.get("CONFIG_FILE", ".cmfconfig") diff --git a/cmflib/commands/metadata/push.py b/cmflib/commands/metadata/push.py index bd630397..73aacc3c 100644 --- a/cmflib/commands/metadata/push.py +++ b/cmflib/commands/metadata/push.py @@ -26,6 +26,9 @@ # This class pushes mlmd file to cmf-server class CmdMetadataPush(CmdBase): + def __init__(self, args): + self.args = args + def run(self): current_directory = os.getcwd() mlmd_file_name = "./mlmd" diff --git a/cmflib/commands/repo/__init__.py b/cmflib/commands/repo/__init__.py index 6d26f58f..a92a9f15 100644 --- a/cmflib/commands/repo/__init__.py +++ b/cmflib/commands/repo/__init__.py @@ -16,10 +16,10 @@ import argparse -from cmflib.commands.repo import push +from cmflib.commands.repo import push, pull from cmflib.cli.utils import * -SUB_COMMANDS = [push] +SUB_COMMANDS = [push, pull] # This parser adds positional arguments to the main parser def add_parser(subparsers, parent_parser): diff --git a/cmflib/commands/repo/pull.py b/cmflib/commands/repo/pull.py new file mode 100644 index 00000000..aa143ef3 --- /dev/null +++ b/cmflib/commands/repo/pull.py @@ -0,0 +1,130 @@ +### +# Copyright (2024) Hewlett Packard Enterprise Development LP +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# You may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +### + +#!/usr/bin/env python3 +import argparse +import os +import subprocess +import requests + +from cmflib.cli.command import CmdBase +from cmflib.dvc_wrapper import dvc_get_config, git_get_repo, git_checkout_new_branch +from cmflib.commands.artifact.pull import CmdArtifactPull +from cmflib.commands.metadata.pull import CmdMetadataPull + + +class CmdRepoPush(CmdBase): + def __init__(self, args): + self.args = args + + def run_command(self, command, cwd=None): + process = subprocess.Popen(command, cwd=cwd, shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + stdout, stderr = process.communicate() + return (stdout.decode('utf-8').strip() if stdout else '', + stderr.decode('utf-8').strip() if stderr else '', + process.returncode) + + def branch_exists(self, repo_own, repo_name, branch_name): + url = f"https://api.github.com/repos/{repo_own}/{repo_name}/branches/{branch_name}" + res = requests.get(url) + + if res.status_code == 200: + return True + return False + + + def run(self): + # check whether dvc is configured or not + msg = "'cmf' is not configured.\nExecute 'cmf init' command." + result = dvc_get_config() + if len(result) == 0: + return msg + + current_directory = os.getcwd() + mlmd_file_name = "./mlmd" + if self.args.file_name: + mlmd_file_name = self.args.file_name + if mlmd_file_name == "mlmd": + mlmd_file_name = "./mlmd" + current_directory = os.path.dirname(mlmd_file_name) + if not os.path.exists(mlmd_file_name): + return f"ERROR: {mlmd_file_name} doesn't exists in {current_directory} directory." + + # artifcat pull + print("artifact pull started...") + instance_of_artifact = CmdArtifactPull(self.args) + instance_of_artifact.run() + + # metadata pull + print("metadata pull started...") + instance_of_metadata = CmdMetadataPull(self.args) + instance_of_metadata.run() + + url = git_get_repo() + url = url.split("/") + # whether branch exists in git repo or not + if self.branch_exists(url[-2], url[-1], "mlmd"): + print("branch exists") + # git pull + print("git pull started...") + stdout, stderr, returncode = self.run_command("git pull cmf_origin mlmd") + # print(returncode+"1") + print(stdout) + if returncode != 0: + return f"Error pulling changes: {stderr}" + return stdout + else: + return "mlmd branch is not exists in github..." + +def add_parser(subparsers, parent_parser): + PUSH_HELP = "Pull user-generated mlmd to server to create one single mlmd file for all the pipelines." + + parser = subparsers.add_parser( + "pull", + parents=[parent_parser], + description="Pull user's mlmd to cmf-server.", + help=PUSH_HELP, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + + required_arguments = parser.add_argument_group("required arguments") + + required_arguments.add_argument( + "-p", + "--pipeline_name", + required=True, + help="Specify Pipeline name.", + metavar="", + ) + + parser.add_argument( + "-f", "--file_name", help="Specify mlmd file name.", metavar="" + ) + + parser.add_argument( + "-e", + "--execution", + help="Specify Execution id.", + metavar="", + ) + + parser.add_argument( + "-a", "--artifact_name", help="Specify artifact name.", metavar="" + ) + + parser.set_defaults(func=CmdRepoPush) diff --git a/cmflib/commands/repo/push.py b/cmflib/commands/repo/push.py index 31ff80f4..ecb9ab09 100644 --- a/cmflib/commands/repo/push.py +++ b/cmflib/commands/repo/push.py @@ -22,18 +22,14 @@ from cmflib.cli.command import CmdBase from cmflib.dvc_wrapper import dvc_get_config, git_get_repo, git_checkout_new_branch +from cmflib.commands.artifact.push import CmdArtifactPush +from cmflib.commands.metadata.push import CmdMetadataPush class CmdRepoPush(CmdBase): - def branch_exists(self, repo_own, repo_name, branch_name): - url = f"https://api.github.com/repos/{repo_own}/{repo_name}/branches/{branch_name}" - res = requests.get(url) - - if res.status_code == 200: - return True - return False - - + def __init__(self, args): + self.args = args + def run_command(self, command, cwd=None): process = subprocess.Popen(command, cwd=cwd, shell=True, stdout=subprocess.PIPE, @@ -43,41 +39,112 @@ def run_command(self, command, cwd=None): stderr.decode('utf-8').strip() if stderr else '', process.returncode) + def branch_exists(self, repo_own, repo_name, branch_name): + url = f"https://api.github.com/repos/{repo_own}/{repo_name}/branches/{branch_name}" + res = requests.get(url) + + if res.status_code == 200: + return True + return False + + def git_push(self): + url = git_get_repo() + url = url.split("/") + # whether branch exists in git repo or not + if self.branch_exists(url[-2], url[-1], "mlmd"): + print("branch exists") + # pull the code + # push the code + stdout, stderr, returncode = self.run_command("git pull cmf_origin mlmd") + # print(returncode+"1") + if returncode != 0: + return f"Error pulling changes: {stderr}" + print(stdout) + + stdout, stderr, returncode = self.run_command("git push -u cmf_origin mlmd") + if returncode != 0: + return f"Error pushing changes: {stderr}" + print(stdout) + return "Successfully pushed and pulled changes!" + else: + print("branch not exists") + # push the code + stdout, stderr, returncode = self.run_command("git push -u cmf_origin mlmd") + if returncode != 0: + return f"Error pushing changes: {stderr}" + return "Successfully pushed and pulled changes!" + def run(self): + # check whether dvc is configured or not msg = "'cmf' is not configured.\nExecute 'cmf init' command." result = dvc_get_config() if len(result) == 0: return msg - else: - # checking if the current branch is cmf_origin or not - if "mlmd" in self.run_command("git branch")[0]: - url = git_get_repo() - if self.branch_exists(url.split("/")[-2], url.split("/")[-1], "mlmd"): - # pull the code - # push the code - stdout, stderr, returncode = self.run_command("git pull cmf_origin mlmd") - # print(returncode+"1") - if returncode != 0: - return f"Error pulling changes: {stderr}" + + current_directory = os.getcwd() + mlmd_file_name = "./mlmd" + + # check whether mlmd file exists or not + if self.args.file_name: + mlmd_file_name = self.args.file_name + current_directory = os.path.dirname(self.args.file_name) + + # checks if mlmd file is present in current directory or given directory + if not os.path.exists(mlmd_file_name): + return f"ERROR: {mlmd_file_name} doesn't exists in the {current_directory}." + else: + print("Executing cmf artifact push command..") + artifact_push_instance = CmdArtifactPush(self.args) + artifact_push_instance.run() - stdout, stderr, returncode = self.run_command("git push -u cmf_origin mlmd") - if returncode != 0: - return f"Error pushing changes: {stderr}" - - return "Successfully pushed and pulled changes!" - else: - # push the code - stdout, stderr, returncode = self.run_command("git push -u cmf_origin mlmd") - if returncode != 0: - return f"Error pushing changes: {stderr}" - return "Successfully pushed and pulled changes!" - else: - if self.args.file_name: - git_checkout_new_branch(self.args.file_name) - else: - git_checkout_new_branch("mlmd") - return "Checking out new branch" + # try: + # print("Executing cmf metadata push command..") + # artifact_push_instance = CmdMetadataPush(self.args) + # artifact_push_instance.run() + # except: + # return + + # print("Execution git push command..") + + # self.git_push() + return "done successfully" + + # print("") + + # else: + # return "file name is not present..." + + # file name not exists + # check whether current branch is mlmd or not + # if "mlmd" in self.run_command("git branch")[0]: + # url = git_get_repo() + # url = url.split("/") + # if self.branch_exists(url[-2], url[-1], "mlmd"): + # # pull the code + # # push the code + # stdout, stderr, returncode = self.run_command("git pull cmf_origin mlmd") + # # print(returncode+"1") + # if returncode != 0: + # return f"Error pulling changes: {stderr}" + + # stdout, stderr, returncode = self.run_command("git push -u cmf_origin mlmd") + # if returncode != 0: + # return f"Error pushing changes: {stderr}" + + # return "Successfully pushed and pulled changes!" + # else: + # # push the code + # stdout, stderr, returncode = self.run_command("git push -u cmf_origin mlmd") + # if returncode != 0: + # return f"Error pushing changes: {stderr}" + # return "Successfully pushed and pulled changes!" + # else: + # if self.args.file_name: + # git_checkout_new_branch(self.args.file_name) + # else: + # git_checkout_new_branch("mlmd") + # return "Checking out new branch" def add_parser(subparsers, parent_parser): @@ -91,8 +158,33 @@ def add_parser(subparsers, parent_parser): formatter_class=argparse.RawDescriptionHelpFormatter, ) + required_arguments = parser.add_argument_group("required arguments") + + required_arguments.add_argument( + "-p", + "--pipeline_name", + required=True, + help="Specify Pipeline name.", + metavar="", + ) + parser.add_argument( "-f", "--file_name", help="Specify mlmd file name.", metavar="" ) + parser.add_argument( + "-e", + "--execution", + help="Specify Execution id.", + default=None, + metavar="", + ) + + parser.add_argument( + "-t", + "--tensorboard", + help="Specify path to tensorboard logs for the pipeline.", + metavar="" + ) + parser.set_defaults(func=CmdRepoPush) From 904f1e70fa2e304ca2231eec61a4ea5207c1bf8b Mon Sep 17 00:00:00 2001 From: AyeshaSanadi Date: Wed, 30 Oct 2024 12:09:04 +0530 Subject: [PATCH 03/41] Modified code for cmf repo push/pull command --- cmflib/cli/__init__.py | 1 - cmflib/commands/repo/pull.py | 60 ++++++++++++++++---------------- cmflib/commands/repo/push.py | 67 +++++++++--------------------------- 3 files changed, 45 insertions(+), 83 deletions(-) diff --git a/cmflib/cli/__init__.py b/cmflib/cli/__init__.py index 09419332..e3d51155 100644 --- a/cmflib/cli/__init__.py +++ b/cmflib/cli/__init__.py @@ -53,7 +53,6 @@ def main(argv=None): try: args = parse_args(argv) cmd = args.func(args) - print(cmd) msg = cmd.do_run() print(msg) except CmfParserError: diff --git a/cmflib/commands/repo/pull.py b/cmflib/commands/repo/pull.py index aa143ef3..52e9a995 100644 --- a/cmflib/commands/repo/pull.py +++ b/cmflib/commands/repo/pull.py @@ -26,7 +26,7 @@ from cmflib.commands.metadata.pull import CmdMetadataPull -class CmdRepoPush(CmdBase): +class CmdRepoPull(CmdBase): def __init__(self, args): self.args = args @@ -46,8 +46,23 @@ def branch_exists(self, repo_own, repo_name, branch_name): if res.status_code == 200: return True return False - - + + def git_pull(self): + url = git_get_repo() + url = url.split("/") + # whether branch exists in git repo or not + if self.branch_exists(url[-2], url[-1], "mlmd"): + print("branch exists") + # git pull + print("git pull started...") + stdout, stderr, returncode = self.run_command("git pull cmf_origin mlmd") + print(stdout) + if returncode != 0: + return f"Error pulling changes: {stderr}" + return stdout + else: + return "mlmd branch is not exists in github..." + def run(self): # check whether dvc is configured or not msg = "'cmf' is not configured.\nExecute 'cmf init' command." @@ -62,43 +77,26 @@ def run(self): if mlmd_file_name == "mlmd": mlmd_file_name = "./mlmd" current_directory = os.path.dirname(mlmd_file_name) + if not os.path.exists(mlmd_file_name): return f"ERROR: {mlmd_file_name} doesn't exists in {current_directory} directory." - - # artifcat pull - print("artifact pull started...") - instance_of_artifact = CmdArtifactPull(self.args) - instance_of_artifact.run() - - # metadata pull - print("metadata pull started...") - instance_of_metadata = CmdMetadataPull(self.args) - instance_of_metadata.run() - - url = git_get_repo() - url = url.split("/") - # whether branch exists in git repo or not - if self.branch_exists(url[-2], url[-1], "mlmd"): - print("branch exists") - # git pull - print("git pull started...") - stdout, stderr, returncode = self.run_command("git pull cmf_origin mlmd") - # print(returncode+"1") - print(stdout) - if returncode != 0: - return f"Error pulling changes: {stderr}" - return stdout else: - return "mlmd branch is not exists in github..." + instance_of_artifact = CmdArtifactPull(self.args) + if instance_of_artifact.run(): + print("metadata pull started...") + instance_of_metadata = CmdMetadataPull(self.args) + if instance_of_metadata.run(): + return self.git_pull() + def add_parser(subparsers, parent_parser): - PUSH_HELP = "Pull user-generated mlmd to server to create one single mlmd file for all the pipelines." + PULL_HELP = "Pull user-generated mlmd to server to create one single mlmd file for all the pipelines." parser = subparsers.add_parser( "pull", parents=[parent_parser], description="Pull user's mlmd to cmf-server.", - help=PUSH_HELP, + help=PULL_HELP, formatter_class=argparse.RawDescriptionHelpFormatter, ) @@ -127,4 +125,4 @@ def add_parser(subparsers, parent_parser): "-a", "--artifact_name", help="Specify artifact name.", metavar="" ) - parser.set_defaults(func=CmdRepoPush) + parser.set_defaults(func=CmdRepoPull) diff --git a/cmflib/commands/repo/push.py b/cmflib/commands/repo/push.py index ecb9ab09..ca88d876 100644 --- a/cmflib/commands/repo/push.py +++ b/cmflib/commands/repo/push.py @@ -21,6 +21,7 @@ import requests from cmflib.cli.command import CmdBase +from cmflib import cmfquery from cmflib.dvc_wrapper import dvc_get_config, git_get_repo, git_checkout_new_branch from cmflib.commands.artifact.push import CmdArtifactPush from cmflib.commands.metadata.push import CmdMetadataPush @@ -52,7 +53,7 @@ def git_push(self): url = url.split("/") # whether branch exists in git repo or not if self.branch_exists(url[-2], url[-1], "mlmd"): - print("branch exists") + # print("branch exists") # pull the code # push the code stdout, stderr, returncode = self.run_command("git pull cmf_origin mlmd") @@ -94,58 +95,22 @@ def run(self): if not os.path.exists(mlmd_file_name): return f"ERROR: {mlmd_file_name} doesn't exists in the {current_directory}." else: + # creating cmfquery object + query = cmfquery.CmfQuery(mlmd_file_name) + # Put a check to see whether pipline exists or not + pipeline_name = self.args.pipeline_name + if not query.get_pipeline_id(pipeline_name) > 0: + return f"ERROR: Pipeline {pipeline_name} doesn't exist!!" + print("Executing cmf artifact push command..") artifact_push_instance = CmdArtifactPush(self.args) - artifact_push_instance.run() - - # try: - # print("Executing cmf metadata push command..") - # artifact_push_instance = CmdMetadataPush(self.args) - # artifact_push_instance.run() - # except: - # return - - # print("Execution git push command..") - - # self.git_push() - return "done successfully" - - # print("") - - # else: - # return "file name is not present..." - - # file name not exists - # check whether current branch is mlmd or not - # if "mlmd" in self.run_command("git branch")[0]: - # url = git_get_repo() - # url = url.split("/") - # if self.branch_exists(url[-2], url[-1], "mlmd"): - # # pull the code - # # push the code - # stdout, stderr, returncode = self.run_command("git pull cmf_origin mlmd") - # # print(returncode+"1") - # if returncode != 0: - # return f"Error pulling changes: {stderr}" - - # stdout, stderr, returncode = self.run_command("git push -u cmf_origin mlmd") - # if returncode != 0: - # return f"Error pushing changes: {stderr}" - - # return "Successfully pushed and pulled changes!" - # else: - # # push the code - # stdout, stderr, returncode = self.run_command("git push -u cmf_origin mlmd") - # if returncode != 0: - # return f"Error pushing changes: {stderr}" - # return "Successfully pushed and pulled changes!" - # else: - # if self.args.file_name: - # git_checkout_new_branch(self.args.file_name) - # else: - # git_checkout_new_branch("mlmd") - # return "Checking out new branch" - + if artifact_push_instance.run(): + print("Executing cmf metadata push command..") + metadata_push_instance = CmdMetadataPush(self.args) + if metadata_push_instance.run(): + print("Execution git push command..") + return self.git_push() + def add_parser(subparsers, parent_parser): PUSH_HELP = "Push user-generated mlmd to server to create one single mlmd file for all the pipelines." From a74dc782722b1b11dafd3dbe874d1836972c4684 Mon Sep 17 00:00:00 2001 From: First Second Date: Wed, 20 Nov 2024 21:56:43 -0800 Subject: [PATCH 04/41] adding cmf exception and success code classes --- cmflib/cli/__init__.py | 9 +++- cmflib/cmf_exception_handling.py | 63 ++++++++++++++++++++++ cmflib/cmf_success_codes.py | 17 ++++++ cmflib/commands/artifact/pull.py | 21 ++++---- cmflib/commands/artifact/push.py | 12 ++--- cmflib/commands/error_handling.py | 29 ++++++++++ cmflib/storage_backends/local_artifacts.py | 1 + cmflib/storage_backends/minio_artifacts.py | 4 +- 8 files changed, 138 insertions(+), 18 deletions(-) create mode 100644 cmflib/cmf_exception_handling.py create mode 100644 cmflib/cmf_success_codes.py create mode 100644 cmflib/commands/error_handling.py diff --git a/cmflib/cli/__init__.py b/cmflib/cli/__init__.py index 58e21183..fe4768ba 100644 --- a/cmflib/cli/__init__.py +++ b/cmflib/cli/__init__.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. ### - +from cmflib.cmf_exception_handling import CmfException class CmfParserError(Exception): """Base class for CLI parser errors.""" @@ -36,6 +36,7 @@ def parse_args(argv=None): parser = get_main_parser() args = parser.parse_args(argv) + args.parser = parser return args @@ -55,9 +56,13 @@ def main(argv=None): cmd = args.func(args) msg = cmd.do_run() print(msg) + except CmfException as e: + print(e.handle(),{"status_code": e.return_code}) except CmfParserError: - pass + pass except KeyboardInterrupt: print("Interrupted by the user") except Exception as e: print(e) + + diff --git a/cmflib/cmf_exception_handling.py b/cmflib/cmf_exception_handling.py new file mode 100644 index 00000000..baa2fbe2 --- /dev/null +++ b/cmflib/cmf_exception_handling.py @@ -0,0 +1,63 @@ +"""Exceptions raised by the CMF.""" + +class CmfException(Exception): + """Base class for all dvc exceptions.""" + + def __init__(self, return_code=None, *args): + self.return_code = return_code + super().__init__(*args) + +class MissingRequiredArgument(CmfException): + def __init__(self,pipeline_name,return_code=1): + self.pipeline_name = pipeline_name + super().__init__(return_code) + + def handle(self): + return f"Pipeline_name {self.pipeline_name} doesnt exist" + +class FileNotFound(CmfException): + def __init__(self,file_name,return_code=2): + self.file_name =file_name + super().__init__(return_code) + + def handle(self): + return f"File Not Found: {self.file_name}" + +class BucketNotFound(CmfException): + def __init__(self,return_code=9): + super().__init__(return_code) + + def handle(self): + return f"Bucket doesnt exist" + +class ExecutionsNotFound(CmfException): + def __init__(self, return_code=6): + super().__init__(return_code) + + def handle(self): + return f"Executions not found" + +class ArtifactNotFound(CmfException): + def __init__(self,artifact_name, return_code=7): + self.artifact_name = artifact_name + super().__init__(return_code) + + def handle(self): + return f"{self.artifact_name} not found" + + +class ObjectDownloadSuccess(CmfException): + def __init__(self,temp_object_name,temp_download_loc, return_code=6): + self.temp_object_name = temp_object_name + self.temp_download_loc = temp_download_loc + super().__init__(return_code) + + def handle(self): + return f"object {self.temp_object_name} downloaded at {self.temp_download_loc}." + +class Minios3ServerInactive(CmfException): + def __init__(self,return_code=8): + super().__init__(return_code) + + def handle(self): + return f"MinioS3 server failed to start!!!" \ No newline at end of file diff --git a/cmflib/cmf_success_codes.py b/cmflib/cmf_success_codes.py new file mode 100644 index 00000000..b38879ab --- /dev/null +++ b/cmflib/cmf_success_codes.py @@ -0,0 +1,17 @@ +class StatusCodes: + codes = { + 0: "Operation completed successfully.", + 10: "File '{filename}' downloaded successfully.", + 20: "Artifact '{artifact_name}' processed successfully.", + 1: "Generic failure.", + 11: "Failed to download file '{filename}'.", + 21: "Failed to process artifact '{artifact_name}'.", + } + + @staticmethod + def get_message(code, **kwargs): + """ + Retrieve the message for a given status code, formatting it with dynamic data. + """ + template = StatusCodes.codes.get(code, "Unknown status code.") + return template.format(**kwargs) \ No newline at end of file diff --git a/cmflib/commands/artifact/pull.py b/cmflib/commands/artifact/pull.py index 0cbdded0..d57d530f 100644 --- a/cmflib/commands/artifact/pull.py +++ b/cmflib/commands/artifact/pull.py @@ -28,7 +28,7 @@ ) from cmflib.cli.command import CmdBase from cmflib.utils.dvc_config import DvcConfig - +from cmflib.cmf_exception_handling import MissingRequiredArgument, Minios3ServerInactive, FileNotFound, ExecutionsNotFound, ArtifactNotFound class CmdArtifactPull(CmdBase): @@ -166,10 +166,13 @@ def run(self): if mlmd_file_name == "mlmd": mlmd_file_name = "./mlmd" current_directory = os.path.dirname(mlmd_file_name) + if self.args.pipeline_name == "": + raise MissingRequiredArgument if not os.path.exists(mlmd_file_name): - return f"ERROR: {mlmd_file_name} doesn't exists in {current_directory} directory." + raise FileNotFound(mlmd_file_name) query = cmfquery.CmfQuery(mlmd_file_name) - + if not query.get_pipeline_id(self.args.pipeline_name) > 0: + raise MissingRequiredArgument(self.args.pipeline_name) # getting all pipeline stages[i.e Prepare, Featurize, Train and Evaluate] stages = query.get_pipeline_stages(self.args.pipeline_name) executions = [] @@ -191,7 +194,7 @@ def run(self): # created dictionary name_url_dict = {} if len(identifiers) == 0: # check if there are no executions - return "No executions found." + raise ExecutionsNotFound() for identifier in identifiers: get_artifacts = query.get_all_artifacts_for_execution( identifier @@ -214,7 +217,7 @@ def run(self): # output[0] = name # output[1] = url if output is None: - print(f"{self.args.artifact_name} doesn't exist.") + raise ArtifactNotFound(self.args.artifact_name) else: minio_args = self.extract_repo_args("minio", output[0], output[1], current_directory) stmt = minio_class_obj.download_artifacts( @@ -246,7 +249,7 @@ def run(self): # output[0] = name # output[1] = url if output is None: - print(f"{self.args.artifact_name} doesn't exist.") + raise ArtifactNotFound(self.args.artifact_name) else: local_args = self.extract_repo_args("local", output[0], output[1], current_directory) stmt = local_class_obj.download_artifacts( @@ -273,7 +276,7 @@ def run(self): # output[0] = name # output[1] = url if output is None: - print(f"{self.args.artifact_name} doesn't exist.") + raise ArtifactNotFound(self.args.artifact_name) else: args = self.extract_repo_args("ssh", output[0], output[1], current_directory) stmt = sshremote_class_obj.download_artifacts( @@ -326,7 +329,7 @@ def run(self): # output[0] = name # output[1] = url if output is None: - print(f"{self.args.artifact_name} doesn't exist.") + raise ArtifactNotFound(self.args.artifact_name) else: args = self.extract_repo_args("osdf", output[0], output[1], current_directory) stmt = osdfremote_class_obj.download_artifacts( @@ -360,7 +363,7 @@ def run(self): # output[0] = name # output[1] = url if output is None: - print(f"{self.args.artifact_name} doesn't exist.") + raise ArtifactNotFound(self.args.artifact_name) else: args = self.extract_repo_args("amazons3", output[0], output[1], current_directory) if args[0] and args[1] and args[2]: diff --git a/cmflib/commands/artifact/push.py b/cmflib/commands/artifact/push.py index 7179c78c..8fcd6b83 100644 --- a/cmflib/commands/artifact/push.py +++ b/cmflib/commands/artifact/push.py @@ -23,11 +23,11 @@ from cmflib.cli.command import CmdBase from cmflib.cli.utils import check_minio_server from cmflib.utils.helper_functions import generate_osdf_token -from cmflib.utils.helper_functions import is_url from cmflib.utils.dvc_config import DvcConfig from cmflib.dvc_wrapper import dvc_push from cmflib.dvc_wrapper import dvc_add_attribute from cmflib.utils.cmf_config import CmfConfig +from cmflib.cmf_exception_handling import MissingRequiredArgument, Minios3ServerInactive, FileNotFound, ExecutionsNotFound class CmdArtifactPush(CmdBase): def run(self): @@ -38,7 +38,7 @@ def run(self): cmf_config=CmfConfig.read_config(cmf_config_file) out_msg = check_minio_server(dvc_config_op) if dvc_config_op["core.remote"] == "minio" and out_msg != "SUCCESS": - return "MinioS3 server failed to start!!!" + raise Minios3ServerInactive() if dvc_config_op["core.remote"] == "osdf": #print("key_id="+cmf_config["osdf-key_id"]) dynamic_password = generate_osdf_token(cmf_config["osdf-key_id"],cmf_config["osdf-key_path"],cmf_config["osdf-key_issuer"]) @@ -58,15 +58,14 @@ def run(self): mlmd_file_name = "./mlmd" current_directory = os.path.dirname(mlmd_file_name) if not os.path.exists(mlmd_file_name): - return f"ERROR: {mlmd_file_name} doesn't exists in {current_directory} directory." - + raise FileNotFound(mlmd_file_name) # creating cmfquery object query = cmfquery.CmfQuery(mlmd_file_name) # Put a check to see whether pipline exists or not pipeline_name = self.args.pipeline_name if not query.get_pipeline_id(pipeline_name) > 0: - return f"ERROR: Pipeline {pipeline_name} doesn't exist!!" + raise MissingRequiredArgument(pipeline_name) stages = query.get_pipeline_stages(self.args.pipeline_name) executions = [] @@ -86,7 +85,7 @@ def run(self): names = [] if len(identifiers) == 0: # check if there are no executions - return "No executions found." + raise ExecutionsNotFound() for identifier in identifiers: artifacts = query.get_all_artifacts_for_execution( identifier @@ -113,6 +112,7 @@ def run(self): pass #print("file_set = ", final_list) result = dvc_push(list(final_list)) + print(result,"result") return result def add_parser(subparsers, parent_parser): diff --git a/cmflib/commands/error_handling.py b/cmflib/commands/error_handling.py new file mode 100644 index 00000000..0bf085d2 --- /dev/null +++ b/cmflib/commands/error_handling.py @@ -0,0 +1,29 @@ +import sys +ERROR_CODES = { + 0: "Success", + 1: "Missing required argument", + 2: "File not found", + 3: "Invalid argument value", + 4: "Operation failed", + 5: "Permission denied", + 6: "No executions found.", + 7: "Pipeline_name doesnt exist", + 8: "MinioS3 server failed to start!!!", + 9: "Bucket doesn't exists", + 10:"object {temp_object_name} downloaded at {temp_download_loc}.", + 11:"object {object_name} downloaded at {download_loc}.", + 12: "Unknown error", +} + +def get_error_message(code): + return ERROR_CODES.get(code, ERROR_CODES[12]) + +def handle_error(return_code, **kwargs): + error_message = get_error_message(return_code) + if return_code==11: + temp_object_name=kwargs["temp_object_name"] + temp_download_loc=kwargs["temp_download_loc"] + error_message=f"object {temp_object_name} downloaded at {temp_download_loc}." + + print(f"Error: {error_message}") + sys.exit(return_code) \ No newline at end of file diff --git a/cmflib/storage_backends/local_artifacts.py b/cmflib/storage_backends/local_artifacts.py index 8626c4cb..a1cc8b3f 100644 --- a/cmflib/storage_backends/local_artifacts.py +++ b/cmflib/storage_backends/local_artifacts.py @@ -16,6 +16,7 @@ import os from dvc.api import DVCFileSystem +from cmflib.commands.error_handling import handle_error class LocalArtifacts: def download_artifacts( diff --git a/cmflib/storage_backends/minio_artifacts.py b/cmflib/storage_backends/minio_artifacts.py index fa2ca7ad..1a3ebab3 100644 --- a/cmflib/storage_backends/minio_artifacts.py +++ b/cmflib/storage_backends/minio_artifacts.py @@ -17,6 +17,7 @@ import os from minio import Minio from minio.error import S3Error +from cmflib.commands.error_handling import handle_error class MinioArtifacts: @@ -37,7 +38,8 @@ def download_artifacts( ) found = client.bucket_exists(bucket_name) if not found: - return "Bucket doesn't exists" + #return "Bucket doesn't exists" + handle_error(return_code = 9) response = "" From b83956f05f4a5c9d397b684cf563ec8d03e2f3b9 Mon Sep 17 00:00:00 2001 From: First Second Date: Thu, 21 Nov 2024 23:04:44 -0800 Subject: [PATCH 05/41] check applied to minios3to check files download failed or assed --- cmflib/cmf_success_codes.py | 17 ++++++++----- cmflib/commands/artifact/pull.py | 15 ++++++++---- cmflib/storage_backends/local_artifacts.py | 9 ++++--- cmflib/storage_backends/minio_artifacts.py | 28 ++++++++++++++++------ 4 files changed, 48 insertions(+), 21 deletions(-) diff --git a/cmflib/cmf_success_codes.py b/cmflib/cmf_success_codes.py index b38879ab..ee3a5845 100644 --- a/cmflib/cmf_success_codes.py +++ b/cmflib/cmf_success_codes.py @@ -1,17 +1,22 @@ class StatusCodes: - codes = { + def __init__(self): + self.codes = { 0: "Operation completed successfully.", + 2: "object {object_name} downloaded at {download_loc}.", 10: "File '{filename}' downloaded successfully.", 20: "Artifact '{artifact_name}' processed successfully.", 1: "Generic failure.", 11: "Failed to download file '{filename}'.", 21: "Failed to process artifact '{artifact_name}'.", - } + 22: "object {object_name} is not downloaded." + } - @staticmethod - def get_message(code, **kwargs): + + def get_message(self,code, **kwargs): """ Retrieve the message for a given status code, formatting it with dynamic data. """ - template = StatusCodes.codes.get(code, "Unknown status code.") - return template.format(**kwargs) \ No newline at end of file + print(code,"inside status code") + template = self.codes.get(code, "Unknown status code.") + print(type(template)) + return code, template.format(**kwargs) \ No newline at end of file diff --git a/cmflib/commands/artifact/pull.py b/cmflib/commands/artifact/pull.py index d57d530f..482a657a 100644 --- a/cmflib/commands/artifact/pull.py +++ b/cmflib/commands/artifact/pull.py @@ -227,21 +227,27 @@ def run(self): minio_args[1], # object_name minio_args[2], # path_name ) - print(stmt) + return stmt else: + count_download_started = 0 + count_download_completed = 0 for name, url in name_url_dict.items(): if not isinstance(url, str): continue minio_args = self.extract_repo_args("minio", name, url, current_directory) - stmt = minio_class_obj.download_artifacts( + count_download_started += 1 + return_code, stmt = minio_class_obj.download_artifacts( dvc_config_op, current_directory, minio_args[0], # bucket_name minio_args[1], # object_name minio_args[2], # path_name ) - print(stmt) - return "Done" + if return_code == 2: + count_download_completed += 1 + + temp = f"files downloaded = {count_download_completed }. Files failed to download = {count_download_started - count_download_completed}" + return temp elif dvc_config_op["core.remote"] == "local-storage": local_class_obj = local_artifacts.LocalArtifacts() if self.args.artifact_name: @@ -357,7 +363,6 @@ def run(self): return "Done" elif dvc_config_op["core.remote"] == "amazons3": amazonS3_class_obj = amazonS3_artifacts.AmazonS3Artifacts() - #print(self.args.artifact_name,"artifact name") if self.args.artifact_name: output = self.search_artifact(name_url_dict) # output[0] = name diff --git a/cmflib/storage_backends/local_artifacts.py b/cmflib/storage_backends/local_artifacts.py index a1cc8b3f..53eda128 100644 --- a/cmflib/storage_backends/local_artifacts.py +++ b/cmflib/storage_backends/local_artifacts.py @@ -16,7 +16,8 @@ import os from dvc.api import DVCFileSystem -from cmflib.commands.error_handling import handle_error +from cmflib.cmf_exception_handling import CmfException +from cmflib.cmf_success_codes import StatusCodes class LocalArtifacts: def download_artifacts( @@ -39,6 +40,7 @@ def download_artifacts( dir_path, _ = download_loc.rsplit("/", 1) if dir_path != "": os.makedirs(dir_path, mode=0o777, exist_ok=True) # creating subfolders if needed + status_code = StatusCodes() response = "" @@ -83,8 +85,9 @@ def download_artifacts( else: response = fs.get_file(object_name, download_loc) if response == None: # get_file() returns none when file gets downloaded. - stmt = f"object {object_name} downloaded at {download_loc}." - return stmt + return_code, stmt = status_code.get_message(int(2),object_name=object_name,download_loc=download_loc) + + return return_code,stmt except TypeError as exception: return exception except Exception as exception: diff --git a/cmflib/storage_backends/minio_artifacts.py b/cmflib/storage_backends/minio_artifacts.py index 1a3ebab3..e9d0efee 100644 --- a/cmflib/storage_backends/minio_artifacts.py +++ b/cmflib/storage_backends/minio_artifacts.py @@ -18,7 +18,8 @@ from minio import Minio from minio.error import S3Error from cmflib.commands.error_handling import handle_error - +from cmflib.cmf_success_codes import StatusCodes +from cmflib.cmf_exception_handling import BucketNotFound class MinioArtifacts: def download_artifacts( @@ -38,9 +39,9 @@ def download_artifacts( ) found = client.bucket_exists(bucket_name) if not found: - #return "Bucket doesn't exists" - handle_error(return_code = 9) + raise BucketNotFound() + status_code = StatusCodes() response = "" """" @@ -48,7 +49,9 @@ def download_artifacts( we download .dir object with 'temp_dir' and remove this after all the files from this .dir object is downloaded. """ + #print("inside download arti") if object_name.endswith('.dir'): + print("inside if loop") # in case of .dir, download_loc is a absolute path for a folder os.makedirs(download_loc, mode=0o777, exist_ok=True) @@ -72,6 +75,7 @@ def download_artifacts( repo_path = object_name.split("/") repo_path = repo_path[:len(repo_path)-2] repo_path = "/".join(repo_path) + count_failed = 0 for file_info in tracked_files: relpath = file_info['relpath'] md5_val = file_info['md5'] @@ -85,16 +89,26 @@ def download_artifacts( if obj: print(f"object {temp_object_name} downloaded at {temp_download_loc}.") else: + count_failed += 1 print(f"object {temp_object_name} is not downloaded.") + if count_failed == 0: # if count_failed is 0 it means all the objects of directory are downloaded + response = True + else: # if count_failed is greater than 0 it means some artifacts or all are not downloaded + response = False else: + print("inside else loop") response = client.fget_object(bucket_name, object_name, download_loc) if response: - stmt = f"object {object_name} downloaded at {download_loc}." - return stmt + print("insdie if reponse ") + #stmt = f"object {object_name} downloaded at {download_loc}." + return_code, stmt = status_code.get_message(int(2),object_name=object_name,download_loc=download_loc) + return return_code, stmt else: - return f"object {object_name} is not downloaded." - + print("insdie else reponse ") + return_code, stmt = status_code.get_message(int(22),object_name=object_name) + return return_code, stmt except TypeError as exception: + #print("inside ") return exception except S3Error as exception: return exception From e03d67516c487bf2d022d28e94b728015af6d02b Mon Sep 17 00:00:00 2001 From: abhinav chobey Date: Wed, 4 Dec 2024 02:46:11 -0800 Subject: [PATCH 06/41] adding changes to metadata push --- cmflib/commands/metadata/push.py | 107 +++++++++++++++++-------------- 1 file changed, 59 insertions(+), 48 deletions(-) diff --git a/cmflib/commands/metadata/push.py b/cmflib/commands/metadata/push.py index bd630397..64983bab 100644 --- a/cmflib/commands/metadata/push.py +++ b/cmflib/commands/metadata/push.py @@ -23,7 +23,8 @@ from cmflib.cli.utils import find_root from cmflib.server_interface import server_interface from cmflib.utils.cmf_config import CmfConfig - +from cmflib.cmf_exception_handling import MlmdAndTensorboardPushSuccess, MlmdAndTensorboardPushFailure, PipelineNameNotFound, MlmdFilePushedSuccess, ExecutionsAlreadyExists +from cmflib.cmf_exception_handling import FileNotFound, ExecutionIDNotFound, PipelineNameNotFound, MlmdFilePulledSuccess, ExecutionsAlreadyExists, UpdateCmfVersion, CmfServerNotAvailable, InternalServerError, CmfNotConfigured # This class pushes mlmd file to cmf-server class CmdMetadataPush(CmdBase): def run(self): @@ -37,7 +38,7 @@ def run(self): # checks if mlmd file is present in current directory or given directory if not os.path.exists(mlmd_file_name): - return f"ERROR: {mlmd_file_name} doesn't exists in the {current_directory}." + raise FileNotFound(mlmd_file_name) query = cmfquery.CmfQuery(mlmd_file_name) # print(json.dumps(json.loads(json_payload), indent=4, sort_keys=True)) @@ -52,17 +53,18 @@ def run(self): # in case, there is no .cmfconfig file if output.find("'cmf' is not configured") != -1: - return output + raise CmfNotConfigured(output) config_file_path = os.path.join(output, cmfconfig) attr_dict = CmfConfig.read_config(config_file_path) url = attr_dict.get("cmf-server-ip", "http://127.0.0.1:80") - print("metadata push started") - print("........................................") + # Checks if pipeline name exists if self.args.pipeline_name in query.get_pipeline_names(): + print("metadata push started") + print("........................................") # converts mlmd file to json format json_payload = query.dumptojson(self.args.pipeline_name, None) # checks if execution_id is given by user @@ -80,59 +82,68 @@ def run(self): ) break if execution_flag == 0: - return "Given execution is not found in mlmd." + raise ExecutionIDNotFound(exec_id) else: exec_id = None response = server_interface.call_mlmd_push(json_payload, url, exec_id, self.args.pipeline_name) status_code = response.status_code - if status_code == 200 and response.json()['status']=="success": - print("mlmd is successfully pushed.") - elif status_code==200 and response.json()["status"]=="exists": - print("Executions already exists.") - elif status_code==422 and response.json()["status"]=="version_update": - return "ERROR: You need to update cmf to the latest version. Unable to push metadata file." - elif status_code == 404: - return "ERROR: cmf-server is not available." - elif status_code == 500: - return "ERROR: Internal server error." - else: - return "ERROR: Status Code = {status_code}. Unable to push mlmd." - - if self.args.tensorboard: + if status_code == 200: + output = "" + if response.json()['status']=="success": + output = "mlmd is successfully pushed." + if response.json()["status"]=="exists": + output = "Executions already exists." + + if self.args.tensorboard: + print(output) # /tensorboard api call is done only if mlmd push is successfully completed # tensorboard parameter is passed - print("......................................") - print("tensorboard logs upload started!!") - print("......................................") - - # check if the path provided is for a file - if os.path.isfile(self.args.tensorboard): - file_name = os.path.basename(self.args.tensorboard) - tresponse = server_interface.call_tensorboard(url, self.args.pipeline_name, file_name, self.args.tensorboard) - tstatus_code = tresponse.status_code - if tstatus_code == 200: - return "tensorboard logs: file {file_name} pushed successfully" + print("......................................") + print("tensorboard logs upload started!!") + print("......................................") + + # check if the path provided is for a file + if os.path.isfile(self.args.tensorboard): + file_name = os.path.basename(self.args.tensorboard) + tresponse = server_interface.call_tensorboard(url, self.args.pipeline_name, file_name, self.args.tensorboard) + tstatus_code = tresponse.status_code + if tstatus_code == 200: + # give status code as success + return MlmdAndTensorboardPushSuccess(file_name) + else: + # give status code as failure + return MlmdAndTensorboardPushFailure(file_name,tresponse.text) + # If path provided is a directory + elif os.path.isdir(self.args.tensorboard): + # Recursively push all files and subdirectories + for root, dirs, files in os.walk(self.args.tensorboard): + for file_name in files: + file_path = os.path.join(root, file_name) + relative_path = os.path.relpath(file_path, self.args.tensorboard) + tresponse = server_interface.call_tensorboard(url, self.args.pipeline_name, relative_path, file_path) + if tresponse.status_code == 200: + print(f"tensorboard logs: File {file_name} uploaded successfully.") + else: + # give status as failure + return MlmdAndTensorboardPushFailure(file_name,tresponse.text) + return MlmdAndTensorboardPushSuccess else: - return "ERROR: Failed to upload file {file_name}. Server response: {response.text}" - # If path provided is a directory - elif os.path.isdir(self.args.tensorboard): - # Recursively push all files and subdirectories - for root, dirs, files in os.walk(self.args.tensorboard): - for file_name in files: - file_path = os.path.join(root, file_name) - relative_path = os.path.relpath(file_path, self.args.tensorboard) - tresponse = server_interface.call_tensorboard(url, self.args.pipeline_name, relative_path, file_path) - if tresponse.status_code == 200: - print(f"tensorboard logs: File {file_name} uploaded successfully.") - else: - return f"ERROR: Failed to upload file {file_name}. Server response: {tresponse.text}" - return f"tensorboard logs: {self.args.tensorboard} uploaded successfully!!" + return "ERROR: Invalid data path. Provide valid file/folder path for tensorboard logs!!" else: - return "ERROR: Invalid data path. Provide valid file/folder path for tensorboard logs!!" + if response.json()['status']=="success": + return MlmdFilePushedSuccess + if response.json()["status"]=="exists": + return ExecutionsAlreadyExists + elif status_code==422 and response.json()["status"]=="version_update": + raise UpdateCmfVersion + elif status_code == 404: + raise CmfServerNotAvailable + elif status_code == 500: + raise InternalServerError else: - return "SUCCESS!!" + return "ERROR: Status Code = {status_code}. Unable to push mlmd." else: - return "Pipeline name " + self.args.pipeline_name + " doesn't exists." + raise PipelineNameNotFound(self.args.pipeline_name) def add_parser(subparsers, parent_parser): From cbdb4aeda47b341e99355e56a56441d2f344a963 Mon Sep 17 00:00:00 2001 From: abhinav chobey Date: Wed, 4 Dec 2024 03:47:36 -0800 Subject: [PATCH 07/41] updated init, artifact, metadata, storage_backend with exception handling --- cmflib/cli/__init__.py | 8 +- cmflib/cmf_exception_handling.py | 236 ++++++++++++++++-- cmflib/cmf_success_codes.py | 22 -- cmflib/commands/artifact/list.py | 10 +- cmflib/commands/artifact/pull.py | 199 +++++++++++---- cmflib/commands/artifact/push.py | 5 +- cmflib/commands/init/amazonS3.py | 9 +- cmflib/commands/init/local.py | 9 +- cmflib/commands/init/minioS3.py | 10 +- cmflib/commands/init/show.py | 7 +- cmflib/commands/init/sshremote.py | 9 +- cmflib/commands/metadata/pull.py | 54 ++-- cmflib/commands/metadata/push_old.py | 178 +++++++++++++ cmflib/storage_backends/amazonS3_artifacts.py | 43 +++- cmflib/storage_backends/local_artifacts.py | 54 ++-- cmflib/storage_backends/minio_artifacts.py | 64 +++-- cmflib/storage_backends/osdf_artifacts.py | 14 +- .../storage_backends/sshremote_artifacts.py | 57 ++++- server/app/get_data.py | 8 +- server/app/main.py | 7 +- 20 files changed, 794 insertions(+), 209 deletions(-) delete mode 100644 cmflib/cmf_success_codes.py create mode 100644 cmflib/commands/metadata/push_old.py diff --git a/cmflib/cli/__init__.py b/cmflib/cli/__init__.py index fe4768ba..eeafc798 100644 --- a/cmflib/cli/__init__.py +++ b/cmflib/cli/__init__.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. ### -from cmflib.cmf_exception_handling import CmfException +from cmflib.cmf_exception_handling import CmfResponse class CmfParserError(Exception): """Base class for CLI parser errors.""" @@ -55,9 +55,9 @@ def main(argv=None): args = parse_args(argv) cmd = args.func(args) msg = cmd.do_run() - print(msg) - except CmfException as e: - print(e.handle(),{"status_code": e.return_code}) + print(msg.handle()) + except CmfResponse as e: + print(e.handle()) except CmfParserError: pass except KeyboardInterrupt: diff --git a/cmflib/cmf_exception_handling.py b/cmflib/cmf_exception_handling.py index baa2fbe2..c4402473 100644 --- a/cmflib/cmf_exception_handling.py +++ b/cmflib/cmf_exception_handling.py @@ -1,63 +1,251 @@ """Exceptions raised by the CMF.""" -class CmfException(Exception): - """Base class for all dvc exceptions.""" +class CmfResponse(Exception): + """Base class for all cmf exceptions.""" - def __init__(self, return_code=None, *args): + def __init__(self, return_code=None, status="failure", *args): self.return_code = return_code + self.status = status super().__init__(*args) -class MissingRequiredArgument(CmfException): - def __init__(self,pipeline_name,return_code=1): +class CmfFailure(CmfResponse): + def __init__(self, return_code=None, *args): + super().__init__(return_code, status="failure", *args) + + +# Subclass for Success Cases +class CmfSuccess(CmfResponse): + def __init__(self, return_code=None, *args): + super().__init__(return_code, status="success", *args) + + +class PipelineNameNotFound(CmfFailure): + def __init__(self,pipeline_name,return_code=101): self.pipeline_name = pipeline_name super().__init__(return_code) def handle(self): - return f"Pipeline_name {self.pipeline_name} doesnt exist" + return f"Pipeline_name {self.pipeline_name} doesn't exist" + -class FileNotFound(CmfException): - def __init__(self,file_name,return_code=2): +class FileNotFound(CmfFailure): + def __init__(self,file_name,return_code=102): self.file_name =file_name super().__init__(return_code) def handle(self): return f"File Not Found: {self.file_name}" -class BucketNotFound(CmfException): - def __init__(self,return_code=9): +class BucketNotFound(CmfFailure): + def __init__(self,return_code=103): super().__init__(return_code) def handle(self): return f"Bucket doesnt exist" -class ExecutionsNotFound(CmfException): - def __init__(self, return_code=6): +class ExecutionsAlreadyExists(CmfSuccess): + def __init__(self, return_code=201): + super().__init__(return_code) + + @staticmethod + def handle(): + return "Executions already exists." + + +class ExecutionsNotFound(CmfFailure): + def __init__(self, return_code=105): super().__init__(return_code) def handle(self): return f"Executions not found" - -class ArtifactNotFound(CmfException): - def __init__(self,artifact_name, return_code=7): + +class ExecutionIDNotFound(CmfFailure): + def __init__(self,exec_id, return_code=106): + self.exec_id = exec_id + super().__init__(return_code) + + def handle(self): + return f"Error: Execution id {self.exec_id} is not present in mlmd." + +class ArtifactNotFound(CmfFailure): + def __init__(self,artifact_name, return_code=107): self.artifact_name = artifact_name super().__init__(return_code) def handle(self): - return f"{self.artifact_name} not found" + return f"Artifact {self.artifact_name} not found" + + +class ObjectDownloadSuccess(CmfSuccess): + def __init__(self,object_name,download_loc, return_code=202): + self.object_name = object_name + self.download_loc = download_loc + super().__init__(return_code) + + def handle(self): + return f"object {self.object_name} downloaded at {self.download_loc}." + +class ObjectDownloadFailure(CmfFailure): + def __init__(self,object_name, return_code=108): + self.object_name = object_name + super().__init__(return_code) + + def handle(self): + return f"object {self.object_name} is not downloaded." + +class BatchDownloadFailure(CmfFailure): + def __init__(self,files_downloaded, Files_failed_to_download, return_code=109): + self.files_downloaded = files_downloaded + self.Files_failed_to_download = Files_failed_to_download + super().__init__(return_code) + + def handle(self): + return f"Number of files downloaded = {self.files_downloaded }. Files failed to download = {self.Files_failed_to_download}" + +class BatchDownloadSuccess(CmfSuccess): + def __init__(self,files_downloaded, return_code=203): + self.files_downloaded = files_downloaded + super().__init__(return_code) + + def handle(self): + return f"Number of files downloaded = {self.files_downloaded }." + +class Minios3ServerInactive(CmfFailure): + def __init__(self,return_code=109): + super().__init__(return_code) + + def handle(self): + return f"MinioS3 server failed to start!!!" + +class CmfNotConfigured(CmfFailure): + def __init__(self,message, return_code=111): + self.message = message + super().__init__(return_code) + + def handle(self): + return self.message + +class MlmdNotFoundOnServer(CmfFailure): + def __init__(self, return_code=113): + super().__init__(return_code) + + def handle(self): + return "mlmd file not available on cmf-server." + +class MlmdFilePulledSuccess(CmfSuccess): + def __init__(self,full_path_to_dump, return_code=204): + self.full_path_to_dump = full_path_to_dump + super().__init__(return_code) + + def handle(self): + return f"SUCCESS: {self.full_path_to_dump} is successfully pulled." + +class MlmdFilePushedSuccess(CmfSuccess): + def __init__(self, return_code=205): + super().__init__(return_code) + + @staticmethod + def handle(): + return f"mlmd is successfully pushed." -class ObjectDownloadSuccess(CmfException): - def __init__(self,temp_object_name,temp_download_loc, return_code=6): - self.temp_object_name = temp_object_name - self.temp_download_loc = temp_download_loc + +class UpdateCmfVersion(CmfFailure): + def __init__(self, return_code=110): super().__init__(return_code) def handle(self): - return f"object {self.temp_object_name} downloaded at {self.temp_download_loc}." + return "ERROR: You need to update cmf to the latest version. Unable to push metadata file." + +class MlmdAndTensorboardPushSuccess(CmfSuccess): + def __init__(self, tensorboard_file_name:str = "All", return_code=207): + self.tensorboard_file_name = tensorboard_file_name + super().__init__(return_code) + + def handle(self): + if self.tensorboard_file_name == "All": + return f"tensorboard logs: files pushed successfully" + return f"tensorboard logs: file {self.tensorboard_file_push_message} pushed successfully" -class Minios3ServerInactive(CmfException): - def __init__(self,return_code=8): +class MlmdAndTensorboardPushFailure(CmfFailure): + def __init__(self,tensorboard_file_name,response_text, return_code=111): + self.tensorboard_file_name = tensorboard_file_name + self.response_text = response_text + super().__init__(return_code) + + def handle(self): + return f"ERROR: Failed to upload file {self.file_name}. Server response: {self.response_text}" + + +class ArgumentNotProvided(CmfFailure): + def __init__(self, return_code=112): + super().__init__(return_code) + + def handle(self): + return "ERROR: Provide user, password and uri for neo4j initialization." + +class CmfInitFailed(CmfFailure): + def __init__(self, return_code=113): + super().__init__(return_code) + + def handle(self): + return "cmf init failed." + +class CmfInitComplete(CmfSuccess): + def __init__(self, return_code=208): + super().__init__(return_code) + + def handle(self): + return "cmf init complete." + +class CmfInitShow(CmfSuccess): + def __init__(self,result, attr_str, return_code=209): + self.result = result + self.attr_str = attr_str + super().__init__(return_code) + + def handle(self): + return f"{self.result}\n{self.attr_str}" + +class CmfServerNotAvailable(CmfFailure): + def __init__(self, return_code=114): + super().__init__(return_code) + + def handle(self): + return "ERROR: cmf-server is not available." + +class InternalServerError(CmfFailure): + def __init__(self, return_code=115): + super().__init__(return_code) + + def handle(self): + return "ERROR: Internal server error." + +class MlmdFilePulledFailure(CmfFailure): + def __init__(self, return_code=204): + super().__init__(return_code) + + def handle(self): + return "ERROR: Unable to pull mlmd." + +class CurrentDirectoryNotfound(CmfFailure): + def __init__(self,current_dir, return_code=116): + self.current_dir = current_dir + super().__init__(return_code) + + def handle(self): + return f"{self.current_dir} doesn't exists." + +class FileNameNotfound(CmfFailure): + def __init__(self, return_code=116): + super().__init__(return_code) + + def handle(self): + return "Provide path with file name." + +class NoDataFoundOsdf(CmfFailure): + def __init__(self, return_code=117): super().__init__(return_code) def handle(self): - return f"MinioS3 server failed to start!!!" \ No newline at end of file + return "No data received from the server." diff --git a/cmflib/cmf_success_codes.py b/cmflib/cmf_success_codes.py deleted file mode 100644 index ee3a5845..00000000 --- a/cmflib/cmf_success_codes.py +++ /dev/null @@ -1,22 +0,0 @@ -class StatusCodes: - def __init__(self): - self.codes = { - 0: "Operation completed successfully.", - 2: "object {object_name} downloaded at {download_loc}.", - 10: "File '{filename}' downloaded successfully.", - 20: "Artifact '{artifact_name}' processed successfully.", - 1: "Generic failure.", - 11: "Failed to download file '{filename}'.", - 21: "Failed to process artifact '{artifact_name}'.", - 22: "object {object_name} is not downloaded." - } - - - def get_message(self,code, **kwargs): - """ - Retrieve the message for a given status code, formatting it with dynamic data. - """ - print(code,"inside status code") - template = self.codes.get(code, "Unknown status code.") - print(type(template)) - return code, template.format(**kwargs) \ No newline at end of file diff --git a/cmflib/commands/artifact/list.py b/cmflib/commands/artifact/list.py index 536e66f1..2b9c7c81 100644 --- a/cmflib/commands/artifact/list.py +++ b/cmflib/commands/artifact/list.py @@ -20,6 +20,7 @@ from cmflib.cli.command import CmdBase from cmflib import cmfquery +from cmflib.cmf_exception_handling import PipelineNameNotFound, FileNotFound, ArtifactNotFound class CmdArtifactsList(CmdBase): def update_dataframe(self, df): @@ -42,13 +43,15 @@ def run(self): current_directory = os.getcwd() # default path for mlmd file name mlmd_file_name = "./mlmd" + if self.args.artifact_name == "": + raise ArtifactNotFound("") if self.args.file_name: mlmd_file_name = self.args.file_name if mlmd_file_name == "mlmd": mlmd_file_name = "./mlmd" current_directory = os.path.dirname(mlmd_file_name) if not os.path.exists(mlmd_file_name): - return f"ERROR: {mlmd_file_name} doesn't exists in {current_directory} directory." + raise FileNotFound(mlmd_file_name) # Creating cmfquery object query = cmfquery.CmfQuery(mlmd_file_name) @@ -61,10 +64,9 @@ def run(self): if(artifact_id != -1): df = df.query(f'id == {int(artifact_id)}') else: - df = "Artifact name does not exist.." + raise ArtifactNotFound else: - df = "Pipeline does not exist..." - + raise PipelineNameNotFound(self.args.pipeline_name) if not isinstance(df, str): if self.args.long: pd.set_option('display.max_rows', None) # Set to None to display all rows diff --git a/cmflib/commands/artifact/pull.py b/cmflib/commands/artifact/pull.py index 482a657a..a7268e01 100644 --- a/cmflib/commands/artifact/pull.py +++ b/cmflib/commands/artifact/pull.py @@ -28,7 +28,8 @@ ) from cmflib.cli.command import CmdBase from cmflib.utils.dvc_config import DvcConfig -from cmflib.cmf_exception_handling import MissingRequiredArgument, Minios3ServerInactive, FileNotFound, ExecutionsNotFound, ArtifactNotFound +from cmflib.cmf_exception_handling import PipelineNameNotFound, Minios3ServerInactive, FileNotFound, ExecutionsNotFound, ArtifactNotFound, BatchDownloadFailure, BatchDownloadSuccess,ObjectDownloadFailure, ObjectDownloadSuccess +from cmflib.cli.utils import check_minio_server class CmdArtifactPull(CmdBase): @@ -166,18 +167,27 @@ def run(self): if mlmd_file_name == "mlmd": mlmd_file_name = "./mlmd" current_directory = os.path.dirname(mlmd_file_name) - if self.args.pipeline_name == "": - raise MissingRequiredArgument - if not os.path.exists(mlmd_file_name): + if not os.path.exists(mlmd_file_name): #checking if MLMD files exists raise FileNotFound(mlmd_file_name) + if self.args.artifact_name == "": + raise ArtifactNotFound("") + if self.args.pipeline_name == "": #checking if pipeline_name is not "" + raise PipelineNameNotFound(self.args.pipeline_name) query = cmfquery.CmfQuery(mlmd_file_name) - if not query.get_pipeline_id(self.args.pipeline_name) > 0: - raise MissingRequiredArgument(self.args.pipeline_name) + if not query.get_pipeline_id(self.args.pipeline_name) > 0: #checking if pipeline name exists in mlmd + raise PipelineNameNotFound(self.args.pipeline_name) + + # dvc_config_op = DvcConfig.get_dvc_config() + # cmf_config_file = os.environ.get("CONFIG_FILE", ".cmfconfig") + # cmf_config={} + # cmf_config=CmfConfig.read_config(cmf_config_file) + # out_msg = check_minio_server(dvc_config_op) + # if dvc_config_op["core.remote"] == "minio" and out_msg != "SUCCESS": #checking if minios3 server is active + # raise Minios3ServerInactive() # getting all pipeline stages[i.e Prepare, Featurize, Train and Evaluate] stages = query.get_pipeline_stages(self.args.pipeline_name) executions = [] identifiers = [] - for stage in stages: # getting all executions for stages executions = query.get_all_executions_in_stage(stage) @@ -190,7 +200,6 @@ def run(self): identifiers.append(id) else: print("No Executions found for " + stage + " stage.") - # created dictionary name_url_dict = {} if len(identifiers) == 0: # check if there are no executions @@ -204,15 +213,15 @@ def run(self): # print(name_url_dict) # name_url_dict = ('artifacts/parsed/test.tsv:6f597d341ceb7d8fbbe88859a892ef81', 'Test-env:/home/sharvark/local-storage/6f/597d341ceb7d8fbbe88859a892ef81' # name_url_dict = ('artifacts/parsed/test.tsv:6f597d341ceb7d8fbbe88859a892ef81', 'Test-env:/home/sharvark/local-storage/6f/597d341ceb7d8fbbe88859a892ef81,Second-env:/home/sharvark/local-storage/6f/597d341ceb7d8fbbe88859a892ef81') - output = DvcConfig.get_dvc_config() # pulling dvc config if type(output) is not dict: return output dvc_config_op = output - + total_files_count = 0 + files_download_completed = 0 if dvc_config_op["core.remote"] == "minio": minio_class_obj = minio_artifacts.MinioArtifacts() - if self.args.artifact_name: + if self.args.artifact_name: #checking if artifact_name is in mlmd output = self.search_artifact(name_url_dict) # output[0] = name # output[1] = url @@ -220,61 +229,106 @@ def run(self): raise ArtifactNotFound(self.args.artifact_name) else: minio_args = self.extract_repo_args("minio", output[0], output[1], current_directory) - stmt = minio_class_obj.download_artifacts( + total_files_in_directory,file_downloaded,return_code = minio_class_obj.download_artifacts( dvc_config_op, current_directory, minio_args[0], # bucket_name minio_args[1], # object_name minio_args[2], # path_name ) - return stmt + file_failed_to_download = total_files_in_directory - file_downloaded + if not minio_args[0].endswith(".dir"): + if return_code == 206: + file_downloaded = 1 + else: + file_failed_to_downloaded = 1 + + if return_code == 206: + status = BatchDownloadSuccess(file_downloaded) + else: + status = BatchDownloadFailure(total_files_in_directory, file_failed_to_downloaded) + return status else: - count_download_started = 0 - count_download_completed = 0 for name, url in name_url_dict.items(): if not isinstance(url, str): continue minio_args = self.extract_repo_args("minio", name, url, current_directory) - count_download_started += 1 - return_code, stmt = minio_class_obj.download_artifacts( + if not minio_args[1].endswith(".dir"): + total_files_count += 1 + total_files_in_dir,count_files_success,return_code = minio_class_obj.download_artifacts( dvc_config_op, current_directory, minio_args[0], # bucket_name minio_args[1], # object_name minio_args[2], # path_name ) - if return_code == 2: - count_download_completed += 1 - - temp = f"files downloaded = {count_download_completed }. Files failed to download = {count_download_started - count_download_completed}" - return temp + total_files_count += total_files_in_dir + files_download_completed += count_files_success + #print(total_files_in_dir,count_files_success,"total_files_in_dir,count_files_success") + if return_code == 206 and not minio_args[1].endswith(".dir") : + files_download_completed += 1 + files_downloaded = files_download_completed + count_files_success + Files_failed_to_download = total_files_in_dir + total_files_count - files_download_completed - count_files_success + if Files_failed_to_download == 0: + status = BatchDownloadSuccess(files_downloaded=files_downloaded) + else: + status = BatchDownloadFailure(files_downloaded=files_downloaded, Files_failed_to_download= Files_failed_to_download) + return status elif dvc_config_op["core.remote"] == "local-storage": local_class_obj = local_artifacts.LocalArtifacts() if self.args.artifact_name: output = self.search_artifact(name_url_dict) # output[0] = name # output[1] = url + if output is None: raise ArtifactNotFound(self.args.artifact_name) else: local_args = self.extract_repo_args("local", output[0], output[1], current_directory) - stmt = local_class_obj.download_artifacts( + total_files_in_directory,file_downloaded,return_code = local_class_obj.download_artifacts( dvc_config_op, current_directory, local_args[0], local_args[1] ) - print(stmt) + file_failed_to_download = total_files_in_directory - file_downloaded + if not local_args[0].endswith(".dir"): + if return_code ==206: + file_downloaded = 1 + else: + file_failed_to_downloaded = 1 + + if return_code == 206: + status = BatchDownloadSuccess(file_downloaded) + else: + status = BatchDownloadFailure(total_files_in_directory, file_failed_to_downloaded) + return status else: for name, url in name_url_dict.items(): - #print(name, url) + print("1") if not isinstance(url, str): continue local_args = self.extract_repo_args("local", name, url, current_directory) + if not local_args[1].endswith(".dir"): + total_files_count += 1 # local_args[0] = current dvc location # local_args[1] = current download location - stmt = local_class_obj.download_artifacts( + total_files_in_dir,count_files_success,return_code = local_class_obj.download_artifacts( dvc_config_op, current_directory, local_args[0], local_args[1] ) - print(stmt) - return "Done" + # print(total_files_in_dir,count_files_success,return_code, stmt) + total_files_count += total_files_in_dir + files_download_completed += count_files_success + #print(total_files_in_dir,count_files_success,"total_files_in_dir,count_files_success") + if return_code == 206 and not local_args[1].endswith(".dir") : + files_download_completed += 1 + files_downloaded = files_download_completed + count_files_success + Files_failed_to_download = total_files_in_dir + total_files_count - files_download_completed - count_files_success + if Files_failed_to_download == 0: + print("2") + status = BatchDownloadSuccess(files_downloaded=files_downloaded) + else: + print("3") + status = BatchDownloadFailure(files_downloaded=files_downloaded, Files_failed_to_download= Files_failed_to_download) + print("4") + return status elif dvc_config_op["core.remote"] == "ssh-storage": sshremote_class_obj = sshremote_artifacts.SSHremoteArtifacts() if self.args.artifact_name: @@ -285,29 +339,52 @@ def run(self): raise ArtifactNotFound(self.args.artifact_name) else: args = self.extract_repo_args("ssh", output[0], output[1], current_directory) - stmt = sshremote_class_obj.download_artifacts( + total_files_in_directory,file_downloaded,return_code = sshremote_class_obj.download_artifacts( dvc_config_op, args[0], # host, current_directory, args[1], # remote_loc of the artifact args[2] # name ) - print(stmt) + file_failed_to_download = total_files_in_directory - file_downloaded + if not args[0].endswith(".dir"): + if return_code == 206: + file_downloaded = 1 + else: + file_failed_to_downloaded = 1 + + if return_code == 206: + status = BatchDownloadSuccess(file_downloaded) + else: + status = BatchDownloadFailure(total_files_in_directory, file_failed_to_downloaded) + return status else: for name, url in name_url_dict.items(): #print(name, url) if not isinstance(url, str): continue args = self.extract_repo_args("ssh", name, url, current_directory) - stmt = sshremote_class_obj.download_artifacts( + if not args[1].endswith(".dir"): + total_files_count += 1 + total_files_in_dir,count_files_success,return_code = sshremote_class_obj.download_artifacts( dvc_config_op, args[0], # host, current_directory, args[1], # remote_loc of the artifact args[2] # name ) - print(stmt) - return "Done" + total_files_count += total_files_in_dir + files_download_completed += count_files_success + #print(total_files_in_dir,count_files_success,"total_files_in_dir,count_files_success") + if return_code == 206 and not args[1].endswith(".dir") : + files_download_completed += 1 + files_downloaded = files_download_completed + count_files_success + Files_failed_to_download = total_files_in_dir + total_files_count - files_download_completed - count_files_success + if Files_failed_to_download == 0: + status = BatchDownloadSuccess(files_downloaded=files_downloaded) + else: + status = BatchDownloadFailure(files_downloaded=files_downloaded, Files_failed_to_download= Files_failed_to_download) + return status elif dvc_config_op["core.remote"] == "osdf": #Regenerate Token for OSDF from cmflib.utils.helper_functions import generate_osdf_token @@ -338,29 +415,42 @@ def run(self): raise ArtifactNotFound(self.args.artifact_name) else: args = self.extract_repo_args("osdf", output[0], output[1], current_directory) - stmt = osdfremote_class_obj.download_artifacts( + return_code = osdfremote_class_obj.download_artifacts( dvc_config_op, args[0], # s_url of the artifact current_directory, args[1], # download_loc of the artifact args[2] # name of the artifact ) - print(stmt) + + if return_code == 206: + status = ObjectDownloadSuccess(args[0],args[1]) + else: + status = ObjectDownloadFailure(args[0],args[1]) + return status else: for name, url in name_url_dict.items(): + total_files_count += 1 #print(name, url) if not isinstance(url, str): continue args = self.extract_repo_args("osdf", name, url, current_directory) - stmt = osdfremote_class_obj.download_artifacts( + + return_code = osdfremote_class_obj.download_artifacts( dvc_config_op, args[0], # host, current_directory, args[1], # remote_loc of the artifact args[2] # name ) - print(stmt) - return "Done" + if return_code == 206: + file_downloaded +=1 + Files_failed_to_download = total_files_count - files_downloaded + if Files_failed_to_download == 0: + status = BatchDownloadSuccess(files_downloaded=files_downloaded) + else: + status = BatchDownloadFailure(files_downloaded=files_downloaded, Files_failed_to_download= Files_failed_to_download) + return status elif dvc_config_op["core.remote"] == "amazons3": amazonS3_class_obj = amazonS3_artifacts.AmazonS3Artifacts() if self.args.artifact_name: @@ -372,29 +462,52 @@ def run(self): else: args = self.extract_repo_args("amazons3", output[0], output[1], current_directory) if args[0] and args[1] and args[2]: - stmt = amazonS3_class_obj.download_artifacts( + total_files_in_directory,file_downloaded,return_code = amazonS3_class_obj.download_artifacts( dvc_config_op, current_directory, args[0], # bucket_name args[1], # object_name args[2], # download_loc ) - print(stmt) + file_failed_to_download = total_files_in_directory - file_downloaded + if not args[0].endswith(".dir"): + if return_code ==206: + file_downloaded = 1 + else: + file_failed_to_download = 1 + + if return_code == 206: + status = BatchDownloadSuccess(file_downloaded) + else: + status = BatchDownloadFailure(total_files_in_directory,file_failed_to_download) + return status else: for name, url in name_url_dict.items(): if not isinstance(url, str): continue args = self.extract_repo_args("amazons3", name, url, current_directory) + if not args[1].endswith(".dir"): + total_files_count += 1 if args[0] and args[1] and args[2]: - stmt = amazonS3_class_obj.download_artifacts( + total_files_in_dir,count_files_success,return_code = amazonS3_class_obj.download_artifacts( dvc_config_op, current_directory, args[0], # bucket_name args[1], # object_name args[2], # download_loc ) - print(stmt) - return "Done" + total_files_count += total_files_in_dir + files_download_completed += count_files_success + #print(total_files_in_dir,count_files_success,"total_files_in_dir,count_files_success") + if return_code == 206 and not args[1].endswith(".dir") : + files_download_completed += 1 + files_downloaded = files_download_completed + count_files_success + Files_failed_to_download = total_files_in_dir + total_files_count - files_download_completed - count_files_success + if Files_failed_to_download == 0: + status = BatchDownloadSuccess(files_downloaded=files_downloaded) + else: + status = BatchDownloadFailure(files_downloaded=files_downloaded, Files_failed_to_download= Files_failed_to_download) + return status else: remote = dvc_config_op["core.remote"] msg = f"{remote} is not valid artifact repository for CMF.\n Reinitialize CMF." diff --git a/cmflib/commands/artifact/push.py b/cmflib/commands/artifact/push.py index 8fcd6b83..2c2cc09d 100644 --- a/cmflib/commands/artifact/push.py +++ b/cmflib/commands/artifact/push.py @@ -27,7 +27,7 @@ from cmflib.dvc_wrapper import dvc_push from cmflib.dvc_wrapper import dvc_add_attribute from cmflib.utils.cmf_config import CmfConfig -from cmflib.cmf_exception_handling import MissingRequiredArgument, Minios3ServerInactive, FileNotFound, ExecutionsNotFound +from cmflib.cmf_exception_handling import PipelineNameNotFound, Minios3ServerInactive, FileNotFound, ExecutionsNotFound class CmdArtifactPush(CmdBase): def run(self): @@ -65,7 +65,7 @@ def run(self): # Put a check to see whether pipline exists or not pipeline_name = self.args.pipeline_name if not query.get_pipeline_id(pipeline_name) > 0: - raise MissingRequiredArgument(pipeline_name) + raise PipelineNameNotFound(pipeline_name) stages = query.get_pipeline_stages(self.args.pipeline_name) executions = [] @@ -112,7 +112,6 @@ def run(self): pass #print("file_set = ", final_list) result = dvc_push(list(final_list)) - print(result,"result") return result def add_parser(subparsers, parent_parser): diff --git a/cmflib/commands/init/amazonS3.py b/cmflib/commands/init/amazonS3.py index cdfc3826..bb7b1f9b 100644 --- a/cmflib/commands/init/amazonS3.py +++ b/cmflib/commands/init/amazonS3.py @@ -30,6 +30,7 @@ ) from cmflib.utils.cmf_config import CmfConfig from cmflib.utils.helper_functions import is_git_repo +from cmflib.cmf_exception_handling import ArgumentNotProvided, CmfInitComplete, CmfInitFailed class CmdInitAmazonS3(CmdBase): def run(self): @@ -62,7 +63,7 @@ def run(self): ): pass else: - return "ERROR: Provide user, password and uri for neo4j initialization." + raise ArgumentNotProvided output = is_git_repo() if not output: @@ -79,12 +80,14 @@ def run(self): repo_type = "amazons3" output = dvc_add_remote_repo(repo_type, self.args.url) if not output: - return "cmf init failed." + raise CmfInitFailed print(output) + print("1") dvc_add_attribute(repo_type, "access_key_id", self.args.access_key_id) dvc_add_attribute(repo_type, "secret_access_key", self.args.secret_key) dvc_add_attribute(repo_type, "session_token", self.args.session_token) - return "cmf init complete." + status = CmfInitComplete() + return status def add_parser(subparsers, parent_parser): diff --git a/cmflib/commands/init/local.py b/cmflib/commands/init/local.py index 741ccc19..1086e9ce 100644 --- a/cmflib/commands/init/local.py +++ b/cmflib/commands/init/local.py @@ -17,7 +17,7 @@ #!/usr/bin/env python3 import argparse import os - +from cmflib.cmf_exception_handling import ArgumentNotProvided, CmfInitComplete, CmfInitFailed from cmflib.cli.command import CmdBase from cmflib.dvc_wrapper import ( git_quiet_init, @@ -61,7 +61,7 @@ def run(self): ): pass else: - return "ERROR: Provide user, password and uri for neo4j initialization." + raise ArgumentNotProvided output = is_git_repo() @@ -79,9 +79,10 @@ def run(self): repo_type = "local-storage" output = dvc_add_remote_repo(repo_type, self.args.path) if not output: - return "cmf init failed." + raise CmfInitFailed print(output) - return "cmf init complete." + status = CmfInitComplete() + return status def add_parser(subparsers, parent_parser): diff --git a/cmflib/commands/init/minioS3.py b/cmflib/commands/init/minioS3.py index 345484a0..4e859458 100644 --- a/cmflib/commands/init/minioS3.py +++ b/cmflib/commands/init/minioS3.py @@ -30,7 +30,7 @@ ) from cmflib.utils.cmf_config import CmfConfig from cmflib.utils.helper_functions import is_git_repo - +from cmflib.cmf_exception_handling import ArgumentNotProvided, CmfInitComplete, CmfInitFailed class CmdInitMinioS3(CmdBase): def run(self): @@ -63,7 +63,7 @@ def run(self): ): pass else: - return "ERROR: Provide user, password and uri for neo4j initialization." + raise ArgumentNotProvided output = is_git_repo() if not output: branch_name = "master" @@ -79,12 +79,14 @@ def run(self): repo_type = "minio" output = dvc_add_remote_repo(repo_type, self.args.url) if not output: - return "cmf init failed." + raise CmfInitFailed print(output) dvc_add_attribute(repo_type, "endpointurl", self.args.endpoint_url) dvc_add_attribute(repo_type, "access_key_id", self.args.access_key_id) dvc_add_attribute(repo_type, "secret_access_key", self.args.secret_key) - return "cmf init complete." + status = CmfInitComplete() + return status + def add_parser(subparsers, parent_parser): diff --git a/cmflib/commands/init/show.py b/cmflib/commands/init/show.py index fa6e7d84..07e01081 100644 --- a/cmflib/commands/init/show.py +++ b/cmflib/commands/init/show.py @@ -22,6 +22,7 @@ from cmflib.cli.utils import find_root from cmflib.dvc_wrapper import dvc_get_config from cmflib.utils.cmf_config import CmfConfig +from cmflib.cmf_exception_handling import CmfNotConfigured, CmfInitShow class CmdInitShow(CmdBase): def run(self): @@ -29,11 +30,11 @@ def run(self): msg = "'cmf' is not configured.\nExecute 'cmf init' command." result = dvc_get_config() if len(result) == 0: - return msg + return CmfNotConfigured(msg) else: cmf_config_root = find_root(cmfconfig) if cmf_config_root.find("'cmf' is not configured") != -1: - return msg + return CmfNotConfigured(msg) config_file_path = os.path.join(cmf_config_root, cmfconfig) attr_dict = CmfConfig.read_config(config_file_path) attr_list = [] @@ -41,7 +42,7 @@ def run(self): temp_str = f"{key} = {value}" attr_list.append(temp_str) attr_str = "\n".join(attr_list) - return f"{result}\n{attr_str}" + return CmfInitShow(result,attr_str) def add_parser(subparsers, parent_parser): diff --git a/cmflib/commands/init/sshremote.py b/cmflib/commands/init/sshremote.py index 245fe2f2..73afb28b 100644 --- a/cmflib/commands/init/sshremote.py +++ b/cmflib/commands/init/sshremote.py @@ -31,6 +31,7 @@ ) from cmflib.utils.cmf_config import CmfConfig from cmflib.utils.helper_functions import is_git_repo +from cmflib.cmf_exception_handling import ArgumentNotProvided, CmfInitComplete, CmfInitFailed class CmdInitSSHRemote(CmdBase): def run(self): @@ -63,7 +64,7 @@ def run(self): ): pass else: - return "ERROR: Provide user, password and uri for neo4j initialization." + raise ArgumentNotProvided output = is_git_repo() if not output: branch_name = "master" @@ -79,12 +80,14 @@ def run(self): dvc_quiet_init() output = dvc_add_remote_repo(repo_type, self.args.path) if not output: - return "cmf init failed." + raise CmfInitFailed print(output) dvc_add_attribute(repo_type, "user", self.args.user) dvc_add_attribute(repo_type, "password", self.args.password) dvc_add_attribute(repo_type, "port", self.args.port) - return "cmf init complete." + status = CmfInitComplete() + return status + def add_parser(subparsers, parent_parser): diff --git a/cmflib/commands/metadata/pull.py b/cmflib/commands/metadata/pull.py index 7c50cbf7..aaee0959 100644 --- a/cmflib/commands/metadata/pull.py +++ b/cmflib/commands/metadata/pull.py @@ -20,26 +20,22 @@ from cmflib import cmf_merger from cmflib.cli.command import CmdBase from cmflib.cli.utils import find_root +from cmflib import cmfquery from cmflib.server_interface import server_interface from cmflib.utils.cmf_config import CmfConfig - - +from cmflib.cmf_exception_handling import PipelineNameNotFound, CmfNotConfigured, FileNotFound, ExecutionIDNotFound, MlmdNotFoundOnServer, MlmdFilePulledSuccess, CmfServerNotAvailable, InternalServerError, MlmdFilePulledFailure, CurrentDirectoryNotfound, FileNameNotfound # This class pulls mlmd file from cmf-server class CmdMetadataPull(CmdBase): def run(self): cmfconfig = os.environ.get("CONFIG_FILE", ".cmfconfig") - # find root_dir of .cmfconfig output = find_root(cmfconfig) - # in case, there is no .cmfconfig file if output.find("'cmf' is not configured") != -1: - return output - + raise CmfNotConfigured(output) config_file_path = os.path.join(output, cmfconfig) attr_dict = CmfConfig.read_config(config_file_path) url = attr_dict.get("cmf-server-ip", "http://127.0.0.1:80") - current_directory = os.getcwd() full_path_to_dump = "" cmd = "pull" @@ -53,9 +49,9 @@ def run(self): if os.path.exists(current_directory): full_path_to_dump = self.args.file_name else: - return f"{current_directory} doesn't exists." + raise CurrentDirectoryNotfound(current_dir= current_directory) else: - return "Provide path with file name." + raise FileNameNotfound else: full_path_to_dump = os.getcwd() + "/mlmd" if self.args.execution: @@ -66,27 +62,37 @@ def run(self): status = output.status_code # checks If given pipeline does not exists/ elif pull mlmd file/ else mlmd file is not available if output.content.decode() == None: - return "Pipeline name " + self.args.pipeline_name + " doesn't exist." + raise PipelineNameNotFound(self.args.pipeline_name) elif output.content.decode() == "no_exec_id": - return f"Error: Execution id {exec_id} is not present in mlmd." + raise ExecutionIDNotFound(exec_id) + elif output.content: - try: - cmf_merger.parse_json_to_mlmd( - output.content, full_path_to_dump, cmd, None - ) # converts mlmd json data to mlmd file - except Exception as e: - return e - # verifying status codes if status == 200: - return f"SUCCESS: {full_path_to_dump} is successfully pulled." + try: + cmf_merger.parse_json_to_mlmd( + output.content, full_path_to_dump, cmd, None + ) # converts mlmd json data to mlmd file + pull_status = MlmdFilePulledSuccess(full_path_to_dump) + return pull_status + except Exception as e: + return e + elif status == 413: + raise MlmdNotFoundOnServer + elif status == 406: + raise PipelineNameNotFound(self.args.pipeline_name) elif status == 404: - return "ERROR: cmf-server is not available." + raise CmfServerNotAvailable elif status == 500: - return "ERROR: Internal server error." + raise InternalServerError else: - return "ERROR: Unable to pull mlmd." - else: - return "mlmd file not available on cmf-server." + raise MlmdFilePulledFailure + + + + # verifying status codes + # else: + # print("4") + # raise MlmdNotFoundOnServer def add_parser(subparsers, parent_parser): diff --git a/cmflib/commands/metadata/push_old.py b/cmflib/commands/metadata/push_old.py new file mode 100644 index 00000000..6498b571 --- /dev/null +++ b/cmflib/commands/metadata/push_old.py @@ -0,0 +1,178 @@ +### +# Copyright (2023) Hewlett Packard Enterprise Development LP +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# You may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +### + +#!/usr/bin/env python3 +import argparse +import os +import json +from cmflib import cmfquery +from cmflib.cli.command import CmdBase +from cmflib.cli.utils import find_root +from cmflib.server_interface import server_interface +from cmflib.utils.cmf_config import CmfConfig +from cmflib.cmf_exception_handling import FileNotFound, ExecutionIDNotFound, PipelineNameNotFound, MlmdFilePulledSuccess, ExecutionsAlreadyExists, UpdateCmfVersion +# This class pushes mlmd file to cmf-server +class CmdMetadataPush(CmdBase): + def run(self): + current_directory = os.getcwd() + mlmd_file_name = "./mlmd" + + # checks if mlmd filepath is given + if self.args.file_name: + mlmd_file_name = self.args.file_name + current_directory = os.path.dirname(self.args.file_name) + + # checks if mlmd file is present in current directory or given directory + if not os.path.exists(mlmd_file_name): + raise FileNotFound(mlmd_file_name) + + query = cmfquery.CmfQuery(mlmd_file_name) + # print(json.dumps(json.loads(json_payload), indent=4, sort_keys=True)) + execution_flag = 0 + status_code = 0 + + # Get url from config + cmfconfig = os.environ.get("CONFIG_FILE",".cmfconfig") + + # find root_dir of .cmfconfig + output = find_root(cmfconfig) + + # in case, there is no .cmfconfig file + if output.find("'cmf' is not configured") != -1: + return output + + config_file_path = os.path.join(output, cmfconfig) + attr_dict = CmfConfig.read_config(config_file_path) + url = attr_dict.get("cmf-server-ip", "http://127.0.0.1:80") + + print("metadata push started") + print("........................................") + + # Checks if pipeline name exists + if self.args.pipeline_name in query.get_pipeline_names(): + # converts mlmd file to json format + json_payload = query.dumptojson(self.args.pipeline_name, None) + # checks if execution_id is given by user + if self.args.execution: + exec_id = self.args.execution + mlmd_data = json.loads(json_payload)["Pipeline"] + # checks if given execution_id present in mlmd + for i in mlmd_data[0]["stages"]: + for j in i["executions"]: + if j["id"] == int(exec_id): + execution_flag = 1 + # calling mlmd_push api to push mlmd file to cmf-server + response = server_interface.call_mlmd_push( + json_payload, url, exec_id, self.args.pipeline_name + ) + break + if execution_flag == 0: + raise ExecutionIDNotFound(exec_id) + else: + exec_id = None + response = server_interface.call_mlmd_push(json_payload, url, exec_id, self.args.pipeline_name) + status_code = response.status_code + + if status_code == 200 and response.json()['status']=="success": + pass + #return MlmdFilePulledSuccess(self.args.filename).handle() + elif status_code==200 and response.json()["status"]=="exists": + raise ExecutionsAlreadyExists + elif status_code==422 and response.json()["status"]=="version_update": + raise UpdateCmfVersion + elif status_code == 404: + return "ERROR: cmf-server is not available." + elif status_code == 500: + return "ERROR: Internal server error." + else: + return "ERROR: Status Code = {status_code}. Unable to push mlmd." + + if self.args.tensorboard: + # /tensorboard api call is done only if mlmd push is successfully completed + # tensorboard parameter is passed + print("......................................") + print("tensorboard logs upload started!!") + print("......................................") + + # check if the path provided is for a file + if os.path.isfile(self.args.tensorboard): + file_name = os.path.basename(self.args.tensorboard) + tresponse = server_interface.call_tensorboard(url, self.args.pipeline_name, file_name, self.args.tensorboard) + tstatus_code = tresponse.status_code + if tstatus_code == 200: + print("tensorboard logs: file {file_name} pushed successfully") + else: + return "ERROR: Failed to upload file {file_name}. Server response: {response.text}" + # If path provided is a directory + elif os.path.isdir(self.args.tensorboard): + # Recursively push all files and subdirectories + for root, dirs, files in os.walk(self.args.tensorboard): + for file_name in files: + file_path = os.path.join(root, file_name) + relative_path = os.path.relpath(file_path, self.args.tensorboard) + tresponse = server_interface.call_tensorboard(url, self.args.pipeline_name, relative_path, file_path) + if tresponse.status_code == 200: + print(f"tensorboard logs: File {file_name} uploaded successfully.") + else: + return f"ERROR: Failed to upload file {file_name}. Server response: {tresponse.text}" + return f"tensorboard logs: {self.args.tensorboard} uploaded successfully!!" + else: + return "ERROR: Invalid data path. Provide valid file/folder path for tensorboard logs!!" + else: + return MlmdFilePulledSuccess(self.args.filename).handle() + else: + raise PipelineNameNotFound(self.args.pipeline_name) + + +def add_parser(subparsers, parent_parser): + PUSH_HELP = "Push user-generated mlmd to server to create one single mlmd file for all the pipelines." + + parser = subparsers.add_parser( + "push", + parents=[parent_parser], + description="Push user's mlmd to cmf-server.", + help=PUSH_HELP, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + required_arguments = parser.add_argument_group("required arguments") + + required_arguments.add_argument( + "-p", + "--pipeline_name", + required=True, + help="Specify Pipeline name.", + metavar="", + ) + + parser.add_argument( + "-f", "--file_name", help="Specify mlmd file name.", metavar="" + ) + + parser.add_argument( + "-e", + "--execution", + help="Specify Execution id.", + metavar="", + ) + + parser.add_argument( + "-t", + "--tensorboard", + help="Specify path to tensorboard logs for the pipeline.", + metavar="" + ) + + parser.set_defaults(func=CmdMetadataPush) diff --git a/cmflib/storage_backends/amazonS3_artifacts.py b/cmflib/storage_backends/amazonS3_artifacts.py index 194641ca..3fe912d3 100644 --- a/cmflib/storage_backends/amazonS3_artifacts.py +++ b/cmflib/storage_backends/amazonS3_artifacts.py @@ -45,6 +45,10 @@ def download_artifacts( os.makedirs(dir_path, mode=0o777, exist_ok=True) # creating subfolders if needed response = "" + total_files_in_directory = 0 + file_download_success = 0 + download_success_return_code = 206 + download_failure_return_code = 207 """" if object_name ends with .dir - it is a directory. @@ -57,7 +61,11 @@ def download_artifacts( # download .dir object temp_dir = f"{download_loc}/temp_dir" - response = s3.download_file(bucket_name, object_name, temp_dir) + try: + response = s3.download_file(bucket_name, object_name, temp_dir) + except Exception as e: + print(f"object {object_name} is not downloaded.") + return total_files_in_directory,file_download_success,download_failure_return_code with open(temp_dir, 'r') as file: tracked_files = eval(file.read()) @@ -72,8 +80,10 @@ def download_artifacts( we need to remove the hash of the .dir from the object_name which will leave us with the artifact repo path """ + file_download_failure = 0 repo_path = "/".join(object_name.split("/")[:-2]) for file_info in tracked_files: + total_files_in_directory += 1 relpath = file_info['relpath'] md5_val = file_info['md5'] # download_loc = /home/user/datatslice/example-get-started/test/artifacts/raw_data @@ -82,15 +92,34 @@ def download_artifacts( formatted_md5 = md5_val[:2] + '/' + md5_val[2:] temp_download_loc = f"{download_loc}/{relpath}" temp_object_name = f"{repo_path}/{formatted_md5}" - obj = s3.download_file(bucket_name, temp_object_name, temp_download_loc) - if obj == None: - print(f"object {temp_object_name} downloaded at {temp_download_loc}.") + try: + obj = s3.download_file(bucket_name, temp_object_name, temp_download_loc) + if obj == None: + file_download_success += 1 + print(f"object {temp_object_name} downloaded at {temp_download_loc}.") + else: + print(f"object {temp_object_name} is not downloaded.") + file_download_failure += 1 + except Exception as e: + print(f"object {temp_object_name} is not downloaded.") + file_download_failure += 1 + if file_download_failure == 0: # if count_failed is 0 it means all the objects of directory are downloaded + response = None + else: # if count_failed is greater than 0 it means some artifacts or all are not downloaded + response = False else: # download objects which are file - response = s3.download_file(bucket_name, object_name, download_loc) + try: + response = s3.download_file(bucket_name, object_name, download_loc) + except Exception as e: + print(f"object {object_name} is not downloaded.") + return total_files_in_directory,file_download_success,download_failure_return_code if response == None: - return f"{object_name} downloaded at {download_loc}" - return response + print(f"object {object_name} downloaded at {download_loc}.") + return total_files_in_directory,file_download_success, download_success_return_code + if response == False: + print(f"object {object_name} is not downloaded.") + return total_files_in_directory,file_download_success,download_failure_return_code except s3.exceptions.ClientError as e: # If a specific error code is returned, the bucket does not exist diff --git a/cmflib/storage_backends/local_artifacts.py b/cmflib/storage_backends/local_artifacts.py index 53eda128..2fdb145b 100644 --- a/cmflib/storage_backends/local_artifacts.py +++ b/cmflib/storage_backends/local_artifacts.py @@ -16,8 +16,7 @@ import os from dvc.api import DVCFileSystem -from cmflib.cmf_exception_handling import CmfException -from cmflib.cmf_success_codes import StatusCodes +from cmflib.cmf_exception_handling import ObjectDownloadSuccess, ObjectDownloadFailure class LocalArtifacts: def download_artifacts( @@ -40,7 +39,6 @@ def download_artifacts( dir_path, _ = download_loc.rsplit("/", 1) if dir_path != "": os.makedirs(dir_path, mode=0o777, exist_ok=True) # creating subfolders if needed - status_code = StatusCodes() response = "" @@ -49,17 +47,24 @@ def download_artifacts( we download .dir object with 'temp_dir' and remove this after all the files from this .dir object is downloaded. """ + total_files_in_directory = 0 + file_download_success = 0 + download_success_return_code = 206 + download_failure_return_code = 207 if object_name.endswith('.dir'): + print("inside") # in case of .dir, download_loc is a absolute path for a folder os.makedirs(download_loc, mode=0o777, exist_ok=True) - + # download the .dir object temp_dir = f"{download_loc}/dir" - response = fs.get_file(object_name, temp_dir) - + try: + response = fs.get_file(object_name, temp_dir) + except Exception as e: + print(f"object {object_name} is not downloaded.") + return total_files_in_directory,file_download_success,download_failure_return_code with open(temp_dir, 'r') as file: tracked_files = eval(file.read()) - # removing temp_dir if os.path.exists(temp_dir): os.remove(temp_dir) @@ -71,7 +76,10 @@ def download_artifacts( which will leave us with the artifact repo path """ repo_path = "/".join(object_name.split("/")[:-2]) + file_download_failure = 0 + for file_info in tracked_files: + total_files_in_directory += 1 relpath = file_info['relpath'] md5_val = file_info['md5'] # md5_val = a237457aa730c396e5acdbc5a64c8453 @@ -79,15 +87,33 @@ def download_artifacts( formatted_md5 = md5_val[:2] + '/' + md5_val[2:] temp_object_name = f"{repo_path}/{formatted_md5}" temp_download_loc = f"{download_loc}/{relpath}" - obj = fs.get_file(temp_object_name, temp_download_loc) - if obj == None: - print(f"object {temp_object_name} downloaded at {temp_download_loc}.") + try: + obj = fs.get_file(temp_object_name, temp_download_loc) + if obj == None: + file_download_success += 1 + print(f"object {temp_object_name} downloaded at {temp_download_loc}.") + else: + print(f"object {temp_object_name} is not downloaded.") + file_download_failure += 1 + except Exception as e: + print(f"object {temp_object_name} is not downloaded.") + file_download_failure += 1 + if file_download_failure == 0: # if count_failed is 0 it means all the objects of directory are downloaded + response = None + else: # if count_failed is greater than 0 it means some artifacts or all are not downloaded + response = False else: - response = fs.get_file(object_name, download_loc) + try: + response = fs.get_file(object_name, download_loc) + except Exception as e: + print(f"object {object_name} is not downloaded.") + return total_files_in_directory,file_download_success,download_failure_return_code if response == None: # get_file() returns none when file gets downloaded. - return_code, stmt = status_code.get_message(int(2),object_name=object_name,download_loc=download_loc) - - return return_code,stmt + print(f"object {object_name} downloaded at {download_loc}.") + return total_files_in_directory,file_download_success, download_success_return_code + if response == False: + print(f"object {object_name} is not downloaded.") + return total_files_in_directory,file_download_success,download_failure_return_code except TypeError as exception: return exception except Exception as exception: diff --git a/cmflib/storage_backends/minio_artifacts.py b/cmflib/storage_backends/minio_artifacts.py index e9d0efee..166a83ec 100644 --- a/cmflib/storage_backends/minio_artifacts.py +++ b/cmflib/storage_backends/minio_artifacts.py @@ -17,8 +17,6 @@ import os from minio import Minio from minio.error import S3Error -from cmflib.commands.error_handling import handle_error -from cmflib.cmf_success_codes import StatusCodes from cmflib.cmf_exception_handling import BucketNotFound class MinioArtifacts: @@ -38,10 +36,9 @@ def download_artifacts( endpoint, access_key=access_key, secret_key=secret_key, secure=False ) found = client.bucket_exists(bucket_name) - if not found: + if not found: #check if minio bucket exists raise BucketNotFound() - status_code = StatusCodes() response = "" """" @@ -50,6 +47,10 @@ def download_artifacts( this after all the files from this .dir object is downloaded. """ #print("inside download arti") + total_files_in_directory = 0 + file_download_success = 0 + download_success_return_code = 206 + download_failure_return_code = 207 if object_name.endswith('.dir'): print("inside if loop") # in case of .dir, download_loc is a absolute path for a folder @@ -57,8 +58,12 @@ def download_artifacts( # download .dir object temp_dir = f"{download_loc}/temp_dir" - response = client.fget_object(bucket_name, object_name, temp_dir) - + try: + response = client.fget_object(bucket_name, object_name, temp_dir) + except Exception as e: + print(f"object {object_name} is not downloaded.") + return total_files_in_directory,file_download_success,download_failure_return_code + with open(temp_dir, 'r') as file: tracked_files = eval(file.read()) @@ -75,8 +80,11 @@ def download_artifacts( repo_path = object_name.split("/") repo_path = repo_path[:len(repo_path)-2] repo_path = "/".join(repo_path) - count_failed = 0 + file_download_failure = 0 + + for file_info in tracked_files: + total_files_in_directory += 1 relpath = file_info['relpath'] md5_val = file_info['md5'] # download_loc = /home/sharvark/datatslice/example-get-started/test/artifacts/raw_data @@ -85,28 +93,34 @@ def download_artifacts( formatted_md5 = md5_val[:2] + '/' + md5_val[2:] temp_download_loc = f"{download_loc}/{relpath}" temp_object_name = f"{repo_path}/{formatted_md5}" - obj = client.fget_object(bucket_name, temp_object_name, temp_download_loc) - if obj: - print(f"object {temp_object_name} downloaded at {temp_download_loc}.") - else: - count_failed += 1 + try: + obj = client.fget_object(bucket_name, temp_object_name, temp_download_loc) + if obj: + print(f"object {temp_object_name} downloaded at {temp_download_loc}.") + file_download_success += 1 + else: + file_download_failure += 1 + print(f"object {temp_object_name} is not downloaded.") + except Exception as e: print(f"object {temp_object_name} is not downloaded.") - if count_failed == 0: # if count_failed is 0 it means all the objects of directory are downloaded - response = True - else: # if count_failed is greater than 0 it means some artifacts or all are not downloaded - response = False + file_download_failure += 1 + if file_download_failure == 0: # if count_failed is 0 it means all the objects of directory are downloaded + response = True + else: # if count_failed is greater than 0 it means some artifacts or all are not downloaded + response = False else: - print("inside else loop") - response = client.fget_object(bucket_name, object_name, download_loc) + #print("inside else loop") + try: + response = client.fget_object(bucket_name, object_name, download_loc) + except Exception as e: + print(f"object {object_name} is not downloaded.") + return total_files_in_directory,file_download_success,download_failure_return_code if response: - print("insdie if reponse ") - #stmt = f"object {object_name} downloaded at {download_loc}." - return_code, stmt = status_code.get_message(int(2),object_name=object_name,download_loc=download_loc) - return return_code, stmt + print(f"object {object_name} downloaded at {download_loc}.") + return total_files_in_directory,file_download_success, download_success_return_code else: - print("insdie else reponse ") - return_code, stmt = status_code.get_message(int(22),object_name=object_name) - return return_code, stmt + print(f"object {object_name} is not downloaded.") + return total_files_in_directory,file_download_success,download_failure_return_code except TypeError as exception: #print("inside ") return exception diff --git a/cmflib/storage_backends/osdf_artifacts.py b/cmflib/storage_backends/osdf_artifacts.py index ff14e51c..f3d9198d 100644 --- a/cmflib/storage_backends/osdf_artifacts.py +++ b/cmflib/storage_backends/osdf_artifacts.py @@ -18,6 +18,7 @@ import requests #import urllib3 #urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) +from cmflib.cmf_exception_handling import NoDataFoundosdf class OSDFremoteArtifacts: def download_artifacts( @@ -35,7 +36,8 @@ def download_artifacts( custom_auth_header = dvc_config_op["remote.osdf.custom_auth_header"] #print(f"dynamic password from download_artifacts={dynamic_password}") #print(f"Fetching artifact={local_path}, surl={host} to {remote_file_path} when this has been called at {current_directory}") - + download_success_return_code = 206 + download_failure_return_code = 207 try: headers={dvc_config_op["remote.osdf.custom_auth_header"]: dvc_config_op["remote.osdf.password"]} temp = local_path.split("/") @@ -52,17 +54,15 @@ def download_artifacts( if response.status_code == 200 and response.content: data = response.content else: - return "No data received from the server." + raise NoDataFoundosdf except Exception as exception: - return exception + print(exception) try: with open(remote_file_path, 'wb') as file: file.write(data) if os.path.exists(remote_file_path) and os.path.getsize(remote_file_path) > 0: - #print(f"object {local_path} downloaded at {remote_file_path}") - stmt = f"object {local_path} downloaded at {remote_file_path}." - return stmt + return download_success_return_code except Exception as e: - print(f"An error occurred while writing to the file: {e}") + return download_failure_return_code diff --git a/cmflib/storage_backends/sshremote_artifacts.py b/cmflib/storage_backends/sshremote_artifacts.py index 40fea410..3b02911b 100644 --- a/cmflib/storage_backends/sshremote_artifacts.py +++ b/cmflib/storage_backends/sshremote_artifacts.py @@ -52,6 +52,11 @@ def download_artifacts( response = "" abs_download_loc = os.path.abspath(os.path.join(current_directory, download_loc)) + + total_files_in_directory = 0 + file_download_success = 0 + download_success_return_code = 206 + download_failure_return_code = 207 """" if object_name ends with .dir - it is a directory. we download .dir object with 'temp_dir' and remove @@ -63,8 +68,14 @@ def download_artifacts( # download .dir object temp_dir = f"{abs_download_loc}/temp_dir" - response = sftp.put(object_name, temp_dir) - + try: + response = sftp.put(object_name, temp_dir) + except Exception as e: + print(f"object {object_name} is not downloaded.") + sftp.close() + ssh.close() + return total_files_in_directory,file_download_success,download_failure_return_code + with open(temp_dir, 'r') as file: tracked_files = eval(file.read()) @@ -80,6 +91,7 @@ def download_artifacts( """ repo_path = "/".join(object_name.split("/")[:-2]) for file_info in tracked_files: + total_files_in_directory += 1 relpath = file_info['relpath'] md5_val = file_info['md5'] # download_loc = /home/user/datatslice/example-get-started/test/artifacts/raw_data @@ -88,18 +100,39 @@ def download_artifacts( formatted_md5 = md5_val[:2] + '/' + md5_val[2:] temp_download_loc = f"{abs_download_loc}/{relpath}" temp_object_name = f"{repo_path}/{formatted_md5}" - obj = sftp.put(object_name, temp_download_loc) - if obj: - print(f"object {temp_object_name} downloaded at {temp_download_loc}.") + try: + obj = sftp.put(object_name, temp_download_loc) + if obj: + print(f"object {temp_object_name} downloaded at {temp_download_loc}.") + file_download_success += 1 + else: + file_download_failure += 1 + print(f"object {temp_object_name} is not downloaded.") + except Exception as e: + print(f"object {temp_object_name} is not downloaded.") + file_download_failure += 1 + if file_download_failure == 0: # if count_failed is 0 it means all the objects of directory are downloaded + response = True + else: # if count_failed is greater than 0 it means some artifacts or all are not downloaded + response = False else: - response = sftp.put(object_name, abs_download_loc) + try: + response = sftp.put(object_name, abs_download_loc) + except Exception as e: + print(f"object {object_name} is not downloaded.") + sftp.close() + ssh.close() + return total_files_in_directory,file_download_success,download_failure_return_code if response: - stmt = f"object {object_name} downloaded at {abs_download_loc}." - return stmt - - sftp.close() - ssh.close() - + print(f"object {object_name} downloaded at {download_loc}.") + sftp.close() + ssh.close() + return total_files_in_directory,file_download_success, download_success_return_code + else: + print(f"object {object_name} is not downloaded.") + sftp.close() + ssh.close() + return total_files_in_directory,file_download_success,download_failure_return_code except TypeError as exception: return exception except Exception as exception: diff --git a/server/app/get_data.py b/server/app/get_data.py index 0a0b6188..d003b379 100644 --- a/server/app/get_data.py +++ b/server/app/get_data.py @@ -249,8 +249,10 @@ def create_unique_executions(server_store_path, req_info) -> str: # mlmd push is failed here status="version_update" return status + print(executions_client, executions_server,"comparision") if executions_server != []: list_executions_exists = list(set(executions_client).intersection(set(executions_server))) + print(list_executions_exists,"list_executions_exists") for i in mlmd_data["Pipeline"]: for stage in i['stages']: for cmf_exec in stage['executions'][:]: @@ -258,22 +260,26 @@ def create_unique_executions(server_store_path, req_info) -> str: for uuid in uuids: if uuid in list_executions_exists: stage['executions'].remove(cmf_exec) - + for i in mlmd_data["Pipeline"]: i['stages']=[stage for stage in i['stages'] if stage['executions']!=[]] for i in mlmd_data["Pipeline"]: + if len(i['stages']) == 0 : status="exists" else: + print(mlmd_data,"mlmd_data") cmf_merger.parse_json_to_mlmd( json.dumps(mlmd_data), "/cmf-server/data/mlmd", "push", req_info["id"] ) + print("inside success condition") status='success' return status def get_mlmd_from_server(server_store_path: str, pipeline_name: str, exec_id: str): + print("get_mlmd_from_server") query = cmfquery.CmfQuery(server_store_path) json_payload = None df = pd.DataFrame() diff --git a/server/app/main.py b/server/app/main.py index 30df9c5a..2b4fbae9 100644 --- a/server/app/main.py +++ b/server/app/main.py @@ -28,6 +28,7 @@ from server.app.query_execution_lineage_d3tree import query_execution_lineage_d3tree from server.app.query_artifact_lineage_d3tree import query_artifact_lineage_d3tree from server.app.query_visualization_artifact_execution import query_visualization_artifact_execution +from cmflib.cmf_exception_handling import MlmdNotFoundOnServer from pathlib import Path import os import json @@ -119,13 +120,15 @@ async def mlmd_push(info: Request): @app.get("/mlmd_pull/{pipeline_name}", response_class=HTMLResponse) async def mlmd_pull(info: Request, pipeline_name: str): # checks if mlmd file exists on server + print("inside api") req_info = await info.json() if os.path.exists(server_store_path): #json_payload values can be json data, NULL or no_exec_id. json_payload= await async_api(get_mlmd_from_server, server_store_path, pipeline_name, req_info['exec_id']) else: - print("No mlmd file submitted.") - json_payload = "" + raise HTTPException(status_code=413, detail=f"mlmd file not available on cmf-server.") + if json_payload == None: + raise HTTPException(status_code=406, detail=f"Pipeline {pipeline_name} not found.") return json_payload # api to display executions available in mlmd From 2a6e8a8f9ed166d210ce2f1d6f64351f17b3d086 Mon Sep 17 00:00:00 2001 From: abhinav chobey Date: Wed, 4 Dec 2024 22:43:09 -0800 Subject: [PATCH 08/41] made changes to metadata push and renumbered all status codes --- cmflib/cmf_exception_handling.py | 45 +++++++++++-------- cmflib/commands/metadata/push.py | 77 ++++++++++++++++---------------- 2 files changed, 64 insertions(+), 58 deletions(-) diff --git a/cmflib/cmf_exception_handling.py b/cmflib/cmf_exception_handling.py index c4402473..14f94afe 100644 --- a/cmflib/cmf_exception_handling.py +++ b/cmflib/cmf_exception_handling.py @@ -53,14 +53,14 @@ def handle(): class ExecutionsNotFound(CmfFailure): - def __init__(self, return_code=105): + def __init__(self, return_code=104): super().__init__(return_code) def handle(self): return f"Executions not found" class ExecutionIDNotFound(CmfFailure): - def __init__(self,exec_id, return_code=106): + def __init__(self,exec_id, return_code=105): self.exec_id = exec_id super().__init__(return_code) @@ -68,7 +68,7 @@ def handle(self): return f"Error: Execution id {self.exec_id} is not present in mlmd." class ArtifactNotFound(CmfFailure): - def __init__(self,artifact_name, return_code=107): + def __init__(self,artifact_name, return_code=106): self.artifact_name = artifact_name super().__init__(return_code) @@ -86,7 +86,7 @@ def handle(self): return f"object {self.object_name} downloaded at {self.download_loc}." class ObjectDownloadFailure(CmfFailure): - def __init__(self,object_name, return_code=108): + def __init__(self,object_name, return_code=107): self.object_name = object_name super().__init__(return_code) @@ -94,7 +94,7 @@ def handle(self): return f"object {self.object_name} is not downloaded." class BatchDownloadFailure(CmfFailure): - def __init__(self,files_downloaded, Files_failed_to_download, return_code=109): + def __init__(self,files_downloaded, Files_failed_to_download, return_code=108): self.files_downloaded = files_downloaded self.Files_failed_to_download = Files_failed_to_download super().__init__(return_code) @@ -118,7 +118,7 @@ def handle(self): return f"MinioS3 server failed to start!!!" class CmfNotConfigured(CmfFailure): - def __init__(self,message, return_code=111): + def __init__(self,message, return_code=110): self.message = message super().__init__(return_code) @@ -126,7 +126,7 @@ def handle(self): return self.message class MlmdNotFoundOnServer(CmfFailure): - def __init__(self, return_code=113): + def __init__(self, return_code=111): super().__init__(return_code) def handle(self): @@ -151,14 +151,14 @@ def handle(): class UpdateCmfVersion(CmfFailure): - def __init__(self, return_code=110): + def __init__(self, return_code=112): super().__init__(return_code) def handle(self): return "ERROR: You need to update cmf to the latest version. Unable to push metadata file." class MlmdAndTensorboardPushSuccess(CmfSuccess): - def __init__(self, tensorboard_file_name:str = "All", return_code=207): + def __init__(self, tensorboard_file_name:str = "All", return_code=206): self.tensorboard_file_name = tensorboard_file_name super().__init__(return_code) @@ -168,7 +168,7 @@ def handle(self): return f"tensorboard logs: file {self.tensorboard_file_push_message} pushed successfully" class MlmdAndTensorboardPushFailure(CmfFailure): - def __init__(self,tensorboard_file_name,response_text, return_code=111): + def __init__(self,tensorboard_file_name,response_text, return_code=113): self.tensorboard_file_name = tensorboard_file_name self.response_text = response_text super().__init__(return_code) @@ -178,28 +178,28 @@ def handle(self): class ArgumentNotProvided(CmfFailure): - def __init__(self, return_code=112): + def __init__(self, return_code=114): super().__init__(return_code) def handle(self): return "ERROR: Provide user, password and uri for neo4j initialization." class CmfInitFailed(CmfFailure): - def __init__(self, return_code=113): + def __init__(self, return_code=115): super().__init__(return_code) def handle(self): return "cmf init failed." class CmfInitComplete(CmfSuccess): - def __init__(self, return_code=208): + def __init__(self, return_code=207): super().__init__(return_code) def handle(self): return "cmf init complete." class CmfInitShow(CmfSuccess): - def __init__(self,result, attr_str, return_code=209): + def __init__(self,result, attr_str, return_code=208): self.result = result self.attr_str = attr_str super().__init__(return_code) @@ -208,14 +208,14 @@ def handle(self): return f"{self.result}\n{self.attr_str}" class CmfServerNotAvailable(CmfFailure): - def __init__(self, return_code=114): + def __init__(self, return_code=116): super().__init__(return_code) def handle(self): return "ERROR: cmf-server is not available." class InternalServerError(CmfFailure): - def __init__(self, return_code=115): + def __init__(self, return_code=117): super().__init__(return_code) def handle(self): @@ -229,7 +229,7 @@ def handle(self): return "ERROR: Unable to pull mlmd." class CurrentDirectoryNotfound(CmfFailure): - def __init__(self,current_dir, return_code=116): + def __init__(self,current_dir, return_code=118): self.current_dir = current_dir super().__init__(return_code) @@ -237,15 +237,22 @@ def handle(self): return f"{self.current_dir} doesn't exists." class FileNameNotfound(CmfFailure): - def __init__(self, return_code=116): + def __init__(self, return_code=119): super().__init__(return_code) def handle(self): return "Provide path with file name." class NoDataFoundOsdf(CmfFailure): - def __init__(self, return_code=117): + def __init__(self, return_code=120): super().__init__(return_code) def handle(self): return "No data received from the server." + +class InvalidTensorboardFilePath(CmfFailure): + def __init__(self, return_code=121): + super().__init__(return_code) + + def handle(self): + return "ERROR: Invalid data path. Provide valid file/folder path for tensorboard logs!!" diff --git a/cmflib/commands/metadata/push.py b/cmflib/commands/metadata/push.py index 64983bab..6a613265 100644 --- a/cmflib/commands/metadata/push.py +++ b/cmflib/commands/metadata/push.py @@ -24,7 +24,7 @@ from cmflib.server_interface import server_interface from cmflib.utils.cmf_config import CmfConfig from cmflib.cmf_exception_handling import MlmdAndTensorboardPushSuccess, MlmdAndTensorboardPushFailure, PipelineNameNotFound, MlmdFilePushedSuccess, ExecutionsAlreadyExists -from cmflib.cmf_exception_handling import FileNotFound, ExecutionIDNotFound, PipelineNameNotFound, MlmdFilePulledSuccess, ExecutionsAlreadyExists, UpdateCmfVersion, CmfServerNotAvailable, InternalServerError, CmfNotConfigured +from cmflib.cmf_exception_handling import FileNotFound, ExecutionIDNotFound, PipelineNameNotFound, MlmdFilePulledSuccess, ExecutionsAlreadyExists, UpdateCmfVersion, CmfServerNotAvailable, InternalServerError, CmfNotConfigured, InvalidTensorboardFilePath # This class pushes mlmd file to cmf-server class CmdMetadataPush(CmdBase): def run(self): @@ -89,51 +89,50 @@ def run(self): status_code = response.status_code if status_code == 200: output = "" + display_output = "" if response.json()['status']=="success": - output = "mlmd is successfully pushed." + display_output = "mlmd is successfully pushed." + output = MlmdFilePushedSuccess if response.json()["status"]=="exists": - output = "Executions already exists." + display_output = "Executions already exists." + output = ExecutionsAlreadyExists - if self.args.tensorboard: - print(output) + if not self.args.tensorboard: + return output + print(display_output) # /tensorboard api call is done only if mlmd push is successfully completed # tensorboard parameter is passed - print("......................................") - print("tensorboard logs upload started!!") - print("......................................") - - # check if the path provided is for a file - if os.path.isfile(self.args.tensorboard): - file_name = os.path.basename(self.args.tensorboard) - tresponse = server_interface.call_tensorboard(url, self.args.pipeline_name, file_name, self.args.tensorboard) - tstatus_code = tresponse.status_code - if tstatus_code == 200: - # give status code as success - return MlmdAndTensorboardPushSuccess(file_name) - else: - # give status code as failure - return MlmdAndTensorboardPushFailure(file_name,tresponse.text) - # If path provided is a directory - elif os.path.isdir(self.args.tensorboard): - # Recursively push all files and subdirectories - for root, dirs, files in os.walk(self.args.tensorboard): - for file_name in files: - file_path = os.path.join(root, file_name) - relative_path = os.path.relpath(file_path, self.args.tensorboard) - tresponse = server_interface.call_tensorboard(url, self.args.pipeline_name, relative_path, file_path) - if tresponse.status_code == 200: - print(f"tensorboard logs: File {file_name} uploaded successfully.") - else: - # give status as failure - return MlmdAndTensorboardPushFailure(file_name,tresponse.text) - return MlmdAndTensorboardPushSuccess + print("......................................") + print("tensorboard logs upload started!!") + print("......................................") + + # check if the path provided is for a file + if os.path.isfile(self.args.tensorboard): + file_name = os.path.basename(self.args.tensorboard) + tresponse = server_interface.call_tensorboard(url, self.args.pipeline_name, file_name, self.args.tensorboard) + tstatus_code = tresponse.status_code + if tstatus_code == 200: + # give status code as success + return MlmdAndTensorboardPushSuccess(file_name) else: - return "ERROR: Invalid data path. Provide valid file/folder path for tensorboard logs!!" + # give status code as failure + return MlmdAndTensorboardPushFailure(file_name,tresponse.text) + # If path provided is a directory + elif os.path.isdir(self.args.tensorboard): + # Recursively push all files and subdirectories + for root, dirs, files in os.walk(self.args.tensorboard): + for file_name in files: + file_path = os.path.join(root, file_name) + relative_path = os.path.relpath(file_path, self.args.tensorboard) + tresponse = server_interface.call_tensorboard(url, self.args.pipeline_name, relative_path, file_path) + if tresponse.status_code == 200: + print(f"tensorboard logs: File {file_name} uploaded successfully.") + else: + # give status as failure + return MlmdAndTensorboardPushFailure(file_name,tresponse.text) + return MlmdAndTensorboardPushSuccess else: - if response.json()['status']=="success": - return MlmdFilePushedSuccess - if response.json()["status"]=="exists": - return ExecutionsAlreadyExists + return InvalidTensorboardFilePath elif status_code==422 and response.json()["status"]=="version_update": raise UpdateCmfVersion elif status_code == 404: From 9d615554b41a460db2d1b6858756e6877158fcfd Mon Sep 17 00:00:00 2001 From: abhinav chobey Date: Sun, 8 Dec 2024 21:33:44 -0800 Subject: [PATCH 09/41] removed print statements, added error handling to osdf init --- cmflib/cmf_exception_handling.py | 9 ++++++++- cmflib/commands/artifact/pull.py | 8 -------- cmflib/commands/error_handling.py | 29 ----------------------------- cmflib/commands/init/amazonS3.py | 1 - cmflib/commands/init/osdfremote.py | 8 ++++---- cmflib/commands/metadata/pull.py | 8 -------- 6 files changed, 12 insertions(+), 51 deletions(-) delete mode 100644 cmflib/commands/error_handling.py diff --git a/cmflib/cmf_exception_handling.py b/cmflib/cmf_exception_handling.py index 14f94afe..55e2a361 100644 --- a/cmflib/cmf_exception_handling.py +++ b/cmflib/cmf_exception_handling.py @@ -1,4 +1,11 @@ -"""Exceptions raised by the CMF.""" +""" + Exceptions raised by the CMF. + CmfResponse includes two child classes + 1. CmfSuccess + 2. CmfFailure + On the basis of success and failure various child classes are created + +""" class CmfResponse(Exception): """Base class for all cmf exceptions.""" diff --git a/cmflib/commands/artifact/pull.py b/cmflib/commands/artifact/pull.py index 6dfdfb98..495ff99f 100644 --- a/cmflib/commands/artifact/pull.py +++ b/cmflib/commands/artifact/pull.py @@ -305,7 +305,6 @@ def run(self): return status else: for name, url in name_url_dict.items(): - print("1") if not isinstance(url, str): continue local_args = self.extract_repo_args("local", name, url, current_directory) @@ -316,21 +315,16 @@ def run(self): total_files_in_dir,count_files_success,return_code = local_class_obj.download_artifacts( dvc_config_op, current_directory, local_args[0], local_args[1] ) - # print(total_files_in_dir,count_files_success,return_code, stmt) total_files_count += total_files_in_dir files_download_completed += count_files_success - #print(total_files_in_dir,count_files_success,"total_files_in_dir,count_files_success") if return_code == 206 and not local_args[1].endswith(".dir") : files_download_completed += 1 files_downloaded = files_download_completed + count_files_success Files_failed_to_download = total_files_in_dir + total_files_count - files_download_completed - count_files_success if Files_failed_to_download == 0: - print("2") status = BatchDownloadSuccess(files_downloaded=files_downloaded) else: - print("3") status = BatchDownloadFailure(files_downloaded=files_downloaded, Files_failed_to_download= Files_failed_to_download) - print("4") return status elif dvc_config_op["core.remote"] == "ssh-storage": sshremote_class_obj = sshremote_artifacts.SSHremoteArtifacts() @@ -378,7 +372,6 @@ def run(self): ) total_files_count += total_files_in_dir files_download_completed += count_files_success - #print(total_files_in_dir,count_files_success,"total_files_in_dir,count_files_success") if return_code == 206 and not args[1].endswith(".dir") : files_download_completed += 1 files_downloaded = files_download_completed + count_files_success @@ -501,7 +494,6 @@ def run(self): ) total_files_count += total_files_in_dir files_download_completed += count_files_success - #print(total_files_in_dir,count_files_success,"total_files_in_dir,count_files_success") if return_code == 206 and not args[1].endswith(".dir") : files_download_completed += 1 files_downloaded = files_download_completed + count_files_success diff --git a/cmflib/commands/error_handling.py b/cmflib/commands/error_handling.py deleted file mode 100644 index 0bf085d2..00000000 --- a/cmflib/commands/error_handling.py +++ /dev/null @@ -1,29 +0,0 @@ -import sys -ERROR_CODES = { - 0: "Success", - 1: "Missing required argument", - 2: "File not found", - 3: "Invalid argument value", - 4: "Operation failed", - 5: "Permission denied", - 6: "No executions found.", - 7: "Pipeline_name doesnt exist", - 8: "MinioS3 server failed to start!!!", - 9: "Bucket doesn't exists", - 10:"object {temp_object_name} downloaded at {temp_download_loc}.", - 11:"object {object_name} downloaded at {download_loc}.", - 12: "Unknown error", -} - -def get_error_message(code): - return ERROR_CODES.get(code, ERROR_CODES[12]) - -def handle_error(return_code, **kwargs): - error_message = get_error_message(return_code) - if return_code==11: - temp_object_name=kwargs["temp_object_name"] - temp_download_loc=kwargs["temp_download_loc"] - error_message=f"object {temp_object_name} downloaded at {temp_download_loc}." - - print(f"Error: {error_message}") - sys.exit(return_code) \ No newline at end of file diff --git a/cmflib/commands/init/amazonS3.py b/cmflib/commands/init/amazonS3.py index bb7b1f9b..3d8edb3c 100644 --- a/cmflib/commands/init/amazonS3.py +++ b/cmflib/commands/init/amazonS3.py @@ -82,7 +82,6 @@ def run(self): if not output: raise CmfInitFailed print(output) - print("1") dvc_add_attribute(repo_type, "access_key_id", self.args.access_key_id) dvc_add_attribute(repo_type, "secret_access_key", self.args.secret_key) dvc_add_attribute(repo_type, "session_token", self.args.session_token) diff --git a/cmflib/commands/init/osdfremote.py b/cmflib/commands/init/osdfremote.py index c187d894..d7cbe607 100644 --- a/cmflib/commands/init/osdfremote.py +++ b/cmflib/commands/init/osdfremote.py @@ -18,7 +18,7 @@ #!/usr/bin/env python3 import argparse import os - +from cmflib.cmf_exception_handling import CmfInitComplete, CmfInitFailed, ArgumentNotProvided from cmflib.cli.command import CmdBase from cmflib.dvc_wrapper import ( git_quiet_init, @@ -64,7 +64,7 @@ def run(self): ): pass else: - return "ERROR: Provide user, password and uri for neo4j initialization." + raise ArgumentNotProvided output = is_git_repo() if not output: branch_name = "master" @@ -80,7 +80,7 @@ def run(self): dvc_quiet_init() output = dvc_add_remote_repo(repo_type, self.args.path) if not output: - return "cmf init failed." + raise CmfInitFailed print(output) #dvc_add_attribute(repo_type, "key_id", self.args.key_id) #dvc_add_attribute(repo_type, "key_path", self.args.key_path) @@ -104,7 +104,7 @@ def run(self): attr_dict["key_issuer"] = self.args.key_issuer CmfConfig.write_config(cmf_config, "osdf", attr_dict, True) - return "cmf init complete." + return CmfInitComplete def add_parser(subparsers, parent_parser): diff --git a/cmflib/commands/metadata/pull.py b/cmflib/commands/metadata/pull.py index aaee0959..5cfbe65c 100644 --- a/cmflib/commands/metadata/pull.py +++ b/cmflib/commands/metadata/pull.py @@ -87,14 +87,6 @@ def run(self): else: raise MlmdFilePulledFailure - - - # verifying status codes - # else: - # print("4") - # raise MlmdNotFoundOnServer - - def add_parser(subparsers, parent_parser): PULL_HELP = "Pulls mlmd from cmf-server to users's machine." From 6e44cc356859d162c567885cdde6b09893351076 Mon Sep 17 00:00:00 2001 From: AyeshaSanadi Date: Mon, 9 Dec 2024 18:57:37 +0530 Subject: [PATCH 10/41] Fixed error occured during testing --- cmflib/cmf_exception_handling.py | 8 +++++ cmflib/commands/artifact/push.py | 12 ++++++-- cmflib/commands/metadata/pull.py | 2 +- cmflib/commands/metadata/push.py | 36 +++++++++++------------ cmflib/storage_backends/osdf_artifacts.py | 4 +-- 5 files changed, 37 insertions(+), 25 deletions(-) diff --git a/cmflib/cmf_exception_handling.py b/cmflib/cmf_exception_handling.py index 55e2a361..3bd3eb64 100644 --- a/cmflib/cmf_exception_handling.py +++ b/cmflib/cmf_exception_handling.py @@ -263,3 +263,11 @@ def __init__(self, return_code=121): def handle(self): return "ERROR: Invalid data path. Provide valid file/folder path for tensorboard logs!!" + +class ArtifactPushSuccess(CmfSuccess): + def __init__(self, message, return_code=205): + self.message = message + super().__init__(return_code) + + def handle(self): + return self.message diff --git a/cmflib/commands/artifact/push.py b/cmflib/commands/artifact/push.py index 2c2cc09d..224a37fd 100644 --- a/cmflib/commands/artifact/push.py +++ b/cmflib/commands/artifact/push.py @@ -26,14 +26,20 @@ from cmflib.utils.dvc_config import DvcConfig from cmflib.dvc_wrapper import dvc_push from cmflib.dvc_wrapper import dvc_add_attribute +from cmflib.cli.utils import find_root from cmflib.utils.cmf_config import CmfConfig -from cmflib.cmf_exception_handling import PipelineNameNotFound, Minios3ServerInactive, FileNotFound, ExecutionsNotFound +from cmflib.cmf_exception_handling import PipelineNameNotFound, Minios3ServerInactive, FileNotFound, ExecutionsNotFound, CmfNotConfigured, ArtifactPushSuccess class CmdArtifactPush(CmdBase): def run(self): result = "" dvc_config_op = DvcConfig.get_dvc_config() cmf_config_file = os.environ.get("CONFIG_FILE", ".cmfconfig") + # find root_dir of .cmfconfig + output = find_root(cmf_config_file) + # in case, there is no .cmfconfig file + if output.find("'cmf' is not configured.") != -1: + raise CmfNotConfigured(output) cmf_config={} cmf_config=CmfConfig.read_config(cmf_config_file) out_msg = check_minio_server(dvc_config_op) @@ -67,7 +73,7 @@ def run(self): if not query.get_pipeline_id(pipeline_name) > 0: raise PipelineNameNotFound(pipeline_name) - stages = query.get_pipeline_stages(self.args.pipeline_name) + stages = query.get_pipeline_stages(pipeline_name) executions = [] identifiers = [] @@ -112,7 +118,7 @@ def run(self): pass #print("file_set = ", final_list) result = dvc_push(list(final_list)) - return result + return ArtifactPushSuccess(result) def add_parser(subparsers, parent_parser): HELP = "Push artifacts to the user configured artifact repo." diff --git a/cmflib/commands/metadata/pull.py b/cmflib/commands/metadata/pull.py index 5cfbe65c..654a5671 100644 --- a/cmflib/commands/metadata/pull.py +++ b/cmflib/commands/metadata/pull.py @@ -31,7 +31,7 @@ def run(self): # find root_dir of .cmfconfig output = find_root(cmfconfig) # in case, there is no .cmfconfig file - if output.find("'cmf' is not configured") != -1: + if output.find("'cmf' is not configured") != -1: raise CmfNotConfigured(output) config_file_path = os.path.join(output, cmfconfig) attr_dict = CmfConfig.read_config(config_file_path) diff --git a/cmflib/commands/metadata/push.py b/cmflib/commands/metadata/push.py index 6a613265..a0c74c64 100644 --- a/cmflib/commands/metadata/push.py +++ b/cmflib/commands/metadata/push.py @@ -28,6 +28,20 @@ # This class pushes mlmd file to cmf-server class CmdMetadataPush(CmdBase): def run(self): + # Get url from config + cmfconfig = os.environ.get("CONFIG_FILE",".cmfconfig") + + # find root_dir of .cmfconfig + output = find_root(cmfconfig) + + # in case, there is no .cmfconfig file + if output.find("'cmf' is not configured.") != -1: + raise CmfNotConfigured(output) + + config_file_path = os.path.join(output, cmfconfig) + attr_dict = CmfConfig.read_config(config_file_path) + url = attr_dict.get("cmf-server-ip", "http://127.0.0.1:80") + current_directory = os.getcwd() mlmd_file_name = "./mlmd" @@ -45,22 +59,6 @@ def run(self): execution_flag = 0 status_code = 0 - # Get url from config - cmfconfig = os.environ.get("CONFIG_FILE",".cmfconfig") - - # find root_dir of .cmfconfig - output = find_root(cmfconfig) - - # in case, there is no .cmfconfig file - if output.find("'cmf' is not configured") != -1: - raise CmfNotConfigured(output) - - config_file_path = os.path.join(output, cmfconfig) - attr_dict = CmfConfig.read_config(config_file_path) - url = attr_dict.get("cmf-server-ip", "http://127.0.0.1:80") - - - # Checks if pipeline name exists if self.args.pipeline_name in query.get_pipeline_names(): print("metadata push started") @@ -113,7 +111,7 @@ def run(self): tstatus_code = tresponse.status_code if tstatus_code == 200: # give status code as success - return MlmdAndTensorboardPushSuccess(file_name) + return MlmdAndTensorboardPushSuccess() else: # give status code as failure return MlmdAndTensorboardPushFailure(file_name,tresponse.text) @@ -130,9 +128,9 @@ def run(self): else: # give status as failure return MlmdAndTensorboardPushFailure(file_name,tresponse.text) - return MlmdAndTensorboardPushSuccess + return MlmdAndTensorboardPushSuccess() else: - return InvalidTensorboardFilePath + return InvalidTensorboardFilePath() elif status_code==422 and response.json()["status"]=="version_update": raise UpdateCmfVersion elif status_code == 404: diff --git a/cmflib/storage_backends/osdf_artifacts.py b/cmflib/storage_backends/osdf_artifacts.py index f3d9198d..b63f7810 100644 --- a/cmflib/storage_backends/osdf_artifacts.py +++ b/cmflib/storage_backends/osdf_artifacts.py @@ -18,7 +18,7 @@ import requests #import urllib3 #urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) -from cmflib.cmf_exception_handling import NoDataFoundosdf +from cmflib.cmf_exception_handling import NoDataFoundOsdf class OSDFremoteArtifacts: def download_artifacts( @@ -54,7 +54,7 @@ def download_artifacts( if response.status_code == 200 and response.content: data = response.content else: - raise NoDataFoundosdf + raise NoDataFoundOsdf except Exception as exception: print(exception) From 5825d91a2339b7c38a867266ac7f280730ac95bc Mon Sep 17 00:00:00 2001 From: abhinav chobey Date: Thu, 12 Dec 2024 00:38:32 -0800 Subject: [PATCH 11/41] update --- cmflib/cmf_exception_handling.py | 216 ++++++++++----------- cmflib/commands/artifact/list.py | 1 - cmflib/commands/artifact/pull.py | 6 +- cmflib/commands/artifact/push.py | 6 +- cmflib/commands/metadata/pull.py | 13 +- cmflib/commands/metadata/push.py | 10 +- cmflib/commands/metadata/push_old.py | 178 ----------------- cmflib/storage_backends/local_artifacts.py | 1 - server/app/get_data.py | 5 - 9 files changed, 122 insertions(+), 314 deletions(-) delete mode 100644 cmflib/commands/metadata/push_old.py diff --git a/cmflib/cmf_exception_handling.py b/cmflib/cmf_exception_handling.py index 3bd3eb64..8eb5d0a6 100644 --- a/cmflib/cmf_exception_handling.py +++ b/cmflib/cmf_exception_handling.py @@ -1,14 +1,14 @@ """ - Exceptions raised by the CMF. - CmfResponse includes two child classes + Response and Exceptions raised by the CMF. + CmfResponse includes two child classes, 1. CmfSuccess 2. CmfFailure - On the basis of success and failure various child classes are created + On the basis of success and failure various child classes are created. """ class CmfResponse(Exception): - """Base class for all cmf exceptions.""" + """Base class for all the cmf responses and exceptions.""" def __init__(self, return_code=None, status="failure", *args): self.return_code = return_code @@ -19,20 +19,97 @@ class CmfFailure(CmfResponse): def __init__(self, return_code=None, *args): super().__init__(return_code, status="failure", *args) - # Subclass for Success Cases class CmfSuccess(CmfResponse): def __init__(self, return_code=None, *args): super().__init__(return_code, status="success", *args) +'''CMF Success Class''' + +class ExecutionsAlreadyExists(CmfSuccess): + def __init__(self, return_code=201): + super().__init__(return_code) + + def handle(): + return "Executions already exists." + +class ObjectDownloadSuccess(CmfSuccess): + def __init__(self,object_name,download_loc, return_code=202): + self.object_name = object_name + self.download_loc = download_loc + super().__init__(return_code) + + def handle(self): + return f"SUCCESS: Object {self.object_name} downloaded at {self.download_loc}." + +class BatchDownloadSuccess(CmfSuccess): + def __init__(self,files_downloaded, return_code=203): + self.files_downloaded = files_downloaded + super().__init__(return_code) + + def handle(self): + return f"SUCCESS: Number of files downloaded = {self.files_downloaded }." + +class MlmdFilePullSuccess(CmfSuccess): + def __init__(self,full_path_to_dump, return_code=204): + self.full_path_to_dump = full_path_to_dump + super().__init__(return_code) + + def handle(self): + return f"SUCCESS: {self.full_path_to_dump} is successfully pulled." + +class MlmdFilePushSuccess(CmfSuccess): + def __init__(self, file_name, return_code=205): + self.file_name = file_name + super().__init__(return_code) + + def handle(self): + return f"SUCCESS: {self.file_name} is successfully pushed." + +class MlmdAndTensorboardPushSuccess(CmfSuccess): + def __init__(self, tensorboard_file_name:str = "All", return_code=206): + self.tensorboard_file_name = tensorboard_file_name + super().__init__(return_code) + + def handle(self): + if self.tensorboard_file_name == "All": + return f"tensorboard logs: files pushed successfully" + return f"tensorboard logs: file {self.tensorboard_file_name} pushed successfully." + +class CmfInitComplete(CmfSuccess): + def __init__(self, return_code=207): + super().__init__(return_code) + + def handle(self): + return "SUCCESS: cmf init complete." -class PipelineNameNotFound(CmfFailure): +class CmfInitShow(CmfSuccess): + def __init__(self,result, attr_str, return_code=208): + self.result = result + self.attr_str = attr_str + super().__init__(return_code) + + def handle(self): + return f"{self.result}\n{self.attr_str}" + +class ArtifactPushSuccess(CmfSuccess): + def __init__(self, message, return_code=209): + self.message = message + super().__init__(return_code) + + def handle(self): + return self.message + + +''' CMF FAILURE CLASSES''' + +class PipelineNotFound(CmfFailure): def __init__(self,pipeline_name,return_code=101): self.pipeline_name = pipeline_name super().__init__(return_code) def handle(self): - return f"Pipeline_name {self.pipeline_name} doesn't exist" + return f"ERROR: Pipeline_name {self.pipeline_name} doesn't exist." class FileNotFound(CmfFailure): @@ -41,30 +118,21 @@ def __init__(self,file_name,return_code=102): super().__init__(return_code) def handle(self): - return f"File Not Found: {self.file_name}" + return f"ERROR: File {self.file_name} Not Found." class BucketNotFound(CmfFailure): def __init__(self,return_code=103): super().__init__(return_code) def handle(self): - return f"Bucket doesnt exist" - -class ExecutionsAlreadyExists(CmfSuccess): - def __init__(self, return_code=201): - super().__init__(return_code) - - @staticmethod - def handle(): - return "Executions already exists." - + return f"ERROR: Bucket doesn't exist." class ExecutionsNotFound(CmfFailure): def __init__(self, return_code=104): super().__init__(return_code) def handle(self): - return f"Executions not found" + return f"ERROR: Executions not found." class ExecutionIDNotFound(CmfFailure): def __init__(self,exec_id, return_code=105): @@ -72,7 +140,7 @@ def __init__(self,exec_id, return_code=105): super().__init__(return_code) def handle(self): - return f"Error: Execution id {self.exec_id} is not present in mlmd." + return f"ERROR: Execution id {self.exec_id} is not present in mlmd." class ArtifactNotFound(CmfFailure): def __init__(self,artifact_name, return_code=106): @@ -80,17 +148,7 @@ def __init__(self,artifact_name, return_code=106): super().__init__(return_code) def handle(self): - return f"Artifact {self.artifact_name} not found" - - -class ObjectDownloadSuccess(CmfSuccess): - def __init__(self,object_name,download_loc, return_code=202): - self.object_name = object_name - self.download_loc = download_loc - super().__init__(return_code) - - def handle(self): - return f"object {self.object_name} downloaded at {self.download_loc}." + return f"ERROR: Artifact {self.artifact_name} not found." class ObjectDownloadFailure(CmfFailure): def __init__(self,object_name, return_code=107): @@ -98,7 +156,7 @@ def __init__(self,object_name, return_code=107): super().__init__(return_code) def handle(self): - return f"object {self.object_name} is not downloaded." + return f"Object {self.object_name} is not downloaded." class BatchDownloadFailure(CmfFailure): def __init__(self,files_downloaded, Files_failed_to_download, return_code=108): @@ -107,22 +165,14 @@ def __init__(self,files_downloaded, Files_failed_to_download, return_code=108): super().__init__(return_code) def handle(self): - return f"Number of files downloaded = {self.files_downloaded }. Files failed to download = {self.Files_failed_to_download}" - -class BatchDownloadSuccess(CmfSuccess): - def __init__(self,files_downloaded, return_code=203): - self.files_downloaded = files_downloaded - super().__init__(return_code) - - def handle(self): - return f"Number of files downloaded = {self.files_downloaded }." + return f"Number of files downloaded = {self.files_downloaded }. Files failed to download = {self.Files_failed_to_download}." class Minios3ServerInactive(CmfFailure): def __init__(self,return_code=109): super().__init__(return_code) def handle(self): - return f"MinioS3 server failed to start!!!" + return f"ERROR: MinioS3 server is not running!!!" class CmfNotConfigured(CmfFailure): def __init__(self,message, return_code=110): @@ -137,26 +187,8 @@ def __init__(self, return_code=111): super().__init__(return_code) def handle(self): - return "mlmd file not available on cmf-server." + return "ERROR: Metadata file not available on cmf-server." -class MlmdFilePulledSuccess(CmfSuccess): - def __init__(self,full_path_to_dump, return_code=204): - self.full_path_to_dump = full_path_to_dump - super().__init__(return_code) - - def handle(self): - return f"SUCCESS: {self.full_path_to_dump} is successfully pulled." - -class MlmdFilePushedSuccess(CmfSuccess): - def __init__(self, return_code=205): - super().__init__(return_code) - - @staticmethod - def handle(): - return f"mlmd is successfully pushed." - - - class UpdateCmfVersion(CmfFailure): def __init__(self, return_code=112): super().__init__(return_code) @@ -164,16 +196,6 @@ def __init__(self, return_code=112): def handle(self): return "ERROR: You need to update cmf to the latest version. Unable to push metadata file." -class MlmdAndTensorboardPushSuccess(CmfSuccess): - def __init__(self, tensorboard_file_name:str = "All", return_code=206): - self.tensorboard_file_name = tensorboard_file_name - super().__init__(return_code) - - def handle(self): - if self.tensorboard_file_name == "All": - return f"tensorboard logs: files pushed successfully" - return f"tensorboard logs: file {self.tensorboard_file_push_message} pushed successfully" - class MlmdAndTensorboardPushFailure(CmfFailure): def __init__(self,tensorboard_file_name,response_text, return_code=113): self.tensorboard_file_name = tensorboard_file_name @@ -181,8 +203,7 @@ def __init__(self,tensorboard_file_name,response_text, return_code=113): super().__init__(return_code) def handle(self): - return f"ERROR: Failed to upload file {self.file_name}. Server response: {self.response_text}" - + return f"ERROR: Failed to upload file {self.tensorboard_file_name}. Server response: {self.response_text}." class ArgumentNotProvided(CmfFailure): def __init__(self, return_code=114): @@ -196,23 +217,7 @@ def __init__(self, return_code=115): super().__init__(return_code) def handle(self): - return "cmf init failed." - -class CmfInitComplete(CmfSuccess): - def __init__(self, return_code=207): - super().__init__(return_code) - - def handle(self): - return "cmf init complete." - -class CmfInitShow(CmfSuccess): - def __init__(self,result, attr_str, return_code=208): - self.result = result - self.attr_str = attr_str - super().__init__(return_code) - - def handle(self): - return f"{self.result}\n{self.attr_str}" + return "ERROR: cmf init failed." class CmfServerNotAvailable(CmfFailure): def __init__(self, return_code=116): @@ -226,48 +231,41 @@ def __init__(self, return_code=117): super().__init__(return_code) def handle(self): - return "ERROR: Internal server error." + return "cmf-server error: Please restart the cmf-server." -class MlmdFilePulledFailure(CmfFailure): - def __init__(self, return_code=204): +class MlmdFilePullFailure(CmfFailure): + def __init__(self, return_code=118): super().__init__(return_code) def handle(self): return "ERROR: Unable to pull mlmd." -class CurrentDirectoryNotfound(CmfFailure): - def __init__(self,current_dir, return_code=118): +class DirectoryNotfound(CmfFailure): + def __init__(self,current_dir, return_code=119): self.current_dir = current_dir super().__init__(return_code) def handle(self): - return f"{self.current_dir} doesn't exists." + return f"ERROR: {self.current_dir} doesn't exists." class FileNameNotfound(CmfFailure): - def __init__(self, return_code=119): + def __init__(self, return_code=120): super().__init__(return_code) def handle(self): - return "Provide path with file name." + return "ERROR: Provide path with file name." class NoDataFoundOsdf(CmfFailure): - def __init__(self, return_code=120): + def __init__(self, return_code=121): super().__init__(return_code) def handle(self): - return "No data received from the server." + return "ERROR: No data received from the server." class InvalidTensorboardFilePath(CmfFailure): - def __init__(self, return_code=121): + def __init__(self, return_code=122): super().__init__(return_code) def handle(self): - return "ERROR: Invalid data path. Provide valid file/folder path for tensorboard logs!!" - -class ArtifactPushSuccess(CmfSuccess): - def __init__(self, message, return_code=205): - self.message = message - super().__init__(return_code) + return "ERROR: Invalid tensorboard logs path. Provide valid file/folder path for tensorboard logs!!" - def handle(self): - return self.message diff --git a/cmflib/commands/artifact/list.py b/cmflib/commands/artifact/list.py index 83fa37a7..69c46c39 100644 --- a/cmflib/commands/artifact/list.py +++ b/cmflib/commands/artifact/list.py @@ -147,7 +147,6 @@ def run(self): mlmd_file_name = self.args.file_name[0].strip() if mlmd_file_name == "mlmd": mlmd_file_name = "./mlmd" - if not os.path.exists(mlmd_file_name): raise FileNotFound(mlmd_file_name) diff --git a/cmflib/commands/artifact/pull.py b/cmflib/commands/artifact/pull.py index 495ff99f..2b58bd60 100644 --- a/cmflib/commands/artifact/pull.py +++ b/cmflib/commands/artifact/pull.py @@ -28,7 +28,7 @@ ) from cmflib.cli.command import CmdBase from cmflib.utils.dvc_config import DvcConfig -from cmflib.cmf_exception_handling import PipelineNameNotFound, Minios3ServerInactive, FileNotFound, ExecutionsNotFound, ArtifactNotFound, BatchDownloadFailure, BatchDownloadSuccess,ObjectDownloadFailure, ObjectDownloadSuccess +from cmflib.cmf_exception_handling import PipelineNotFound, FileNotFound, ExecutionsNotFound, ArtifactNotFound, BatchDownloadFailure, BatchDownloadSuccess,ObjectDownloadFailure, ObjectDownloadSuccess from cmflib.cli.utils import check_minio_server class CmdArtifactPull(CmdBase): @@ -175,10 +175,10 @@ def run(self): if self.args.artifact_name == "": raise ArtifactNotFound("") if self.args.pipeline_name == "": #checking if pipeline_name is not "" - raise PipelineNameNotFound(self.args.pipeline_name) + raise PipelineNotFound(self.args.pipeline_name) query = cmfquery.CmfQuery(mlmd_file_name) if not query.get_pipeline_id(self.args.pipeline_name) > 0: #checking if pipeline name exists in mlmd - raise PipelineNameNotFound(self.args.pipeline_name) + raise PipelineNotFound(self.args.pipeline_name) # dvc_config_op = DvcConfig.get_dvc_config() # cmf_config_file = os.environ.get("CONFIG_FILE", ".cmfconfig") diff --git a/cmflib/commands/artifact/push.py b/cmflib/commands/artifact/push.py index 224a37fd..d91bad2a 100644 --- a/cmflib/commands/artifact/push.py +++ b/cmflib/commands/artifact/push.py @@ -28,7 +28,7 @@ from cmflib.dvc_wrapper import dvc_add_attribute from cmflib.cli.utils import find_root from cmflib.utils.cmf_config import CmfConfig -from cmflib.cmf_exception_handling import PipelineNameNotFound, Minios3ServerInactive, FileNotFound, ExecutionsNotFound, CmfNotConfigured, ArtifactPushSuccess +from cmflib.cmf_exception_handling import PipelineNotFound, Minios3ServerInactive, FileNotFound, ExecutionsNotFound, CmfNotConfigured, ArtifactPushSuccess class CmdArtifactPush(CmdBase): def run(self): @@ -55,14 +55,12 @@ def run(self): #print(result) return result - current_directory = os.getcwd() # Default path of mlmd file mlmd_file_name = "./mlmd" if self.args.file_name: mlmd_file_name = self.args.file_name if mlmd_file_name == "mlmd": mlmd_file_name = "./mlmd" - current_directory = os.path.dirname(mlmd_file_name) if not os.path.exists(mlmd_file_name): raise FileNotFound(mlmd_file_name) # creating cmfquery object @@ -71,7 +69,7 @@ def run(self): # Put a check to see whether pipline exists or not pipeline_name = self.args.pipeline_name if not query.get_pipeline_id(pipeline_name) > 0: - raise PipelineNameNotFound(pipeline_name) + raise PipelineNotFound(pipeline_name) stages = query.get_pipeline_stages(pipeline_name) executions = [] diff --git a/cmflib/commands/metadata/pull.py b/cmflib/commands/metadata/pull.py index 654a5671..24209519 100644 --- a/cmflib/commands/metadata/pull.py +++ b/cmflib/commands/metadata/pull.py @@ -20,10 +20,9 @@ from cmflib import cmf_merger from cmflib.cli.command import CmdBase from cmflib.cli.utils import find_root -from cmflib import cmfquery from cmflib.server_interface import server_interface from cmflib.utils.cmf_config import CmfConfig -from cmflib.cmf_exception_handling import PipelineNameNotFound, CmfNotConfigured, FileNotFound, ExecutionIDNotFound, MlmdNotFoundOnServer, MlmdFilePulledSuccess, CmfServerNotAvailable, InternalServerError, MlmdFilePulledFailure, CurrentDirectoryNotfound, FileNameNotfound +from cmflib.cmf_exception_handling import PipelineNotFound, CmfNotConfigured, ExecutionIDNotFound, MlmdNotFoundOnServer, MlmdFilePullSuccess, CmfServerNotAvailable, InternalServerError, MlmdFilePullFailure, DirectoryNotfound, FileNameNotfound # This class pulls mlmd file from cmf-server class CmdMetadataPull(CmdBase): def run(self): @@ -49,7 +48,7 @@ def run(self): if os.path.exists(current_directory): full_path_to_dump = self.args.file_name else: - raise CurrentDirectoryNotfound(current_dir= current_directory) + raise DirectoryNotfound(current_dir= current_directory) else: raise FileNameNotfound else: @@ -62,7 +61,7 @@ def run(self): status = output.status_code # checks If given pipeline does not exists/ elif pull mlmd file/ else mlmd file is not available if output.content.decode() == None: - raise PipelineNameNotFound(self.args.pipeline_name) + raise PipelineNotFound(self.args.pipeline_name) elif output.content.decode() == "no_exec_id": raise ExecutionIDNotFound(exec_id) @@ -72,20 +71,20 @@ def run(self): cmf_merger.parse_json_to_mlmd( output.content, full_path_to_dump, cmd, None ) # converts mlmd json data to mlmd file - pull_status = MlmdFilePulledSuccess(full_path_to_dump) + pull_status = MlmdFilePullSuccess(full_path_to_dump) return pull_status except Exception as e: return e elif status == 413: raise MlmdNotFoundOnServer elif status == 406: - raise PipelineNameNotFound(self.args.pipeline_name) + raise PipelineNotFound(self.args.pipeline_name) elif status == 404: raise CmfServerNotAvailable elif status == 500: raise InternalServerError else: - raise MlmdFilePulledFailure + raise MlmdFilePullFailure def add_parser(subparsers, parent_parser): PULL_HELP = "Pulls mlmd from cmf-server to users's machine." diff --git a/cmflib/commands/metadata/push.py b/cmflib/commands/metadata/push.py index a0c74c64..00e4d250 100644 --- a/cmflib/commands/metadata/push.py +++ b/cmflib/commands/metadata/push.py @@ -23,8 +23,8 @@ from cmflib.cli.utils import find_root from cmflib.server_interface import server_interface from cmflib.utils.cmf_config import CmfConfig -from cmflib.cmf_exception_handling import MlmdAndTensorboardPushSuccess, MlmdAndTensorboardPushFailure, PipelineNameNotFound, MlmdFilePushedSuccess, ExecutionsAlreadyExists -from cmflib.cmf_exception_handling import FileNotFound, ExecutionIDNotFound, PipelineNameNotFound, MlmdFilePulledSuccess, ExecutionsAlreadyExists, UpdateCmfVersion, CmfServerNotAvailable, InternalServerError, CmfNotConfigured, InvalidTensorboardFilePath +from cmflib.cmf_exception_handling import MlmdAndTensorboardPushSuccess, MlmdAndTensorboardPushFailure, MlmdFilePushedSuccess, ExecutionsAlreadyExists +from cmflib.cmf_exception_handling import FileNotFound, ExecutionIDNotFound, PipelineNotFound, ExecutionsAlreadyExists, UpdateCmfVersion, CmfServerNotAvailable, InternalServerError, CmfNotConfigured, InvalidTensorboardFilePath # This class pushes mlmd file to cmf-server class CmdMetadataPush(CmdBase): def run(self): @@ -42,13 +42,11 @@ def run(self): attr_dict = CmfConfig.read_config(config_file_path) url = attr_dict.get("cmf-server-ip", "http://127.0.0.1:80") - current_directory = os.getcwd() mlmd_file_name = "./mlmd" # checks if mlmd filepath is given if self.args.file_name: mlmd_file_name = self.args.file_name - current_directory = os.path.dirname(self.args.file_name) # checks if mlmd file is present in current directory or given directory if not os.path.exists(mlmd_file_name): @@ -90,7 +88,7 @@ def run(self): display_output = "" if response.json()['status']=="success": display_output = "mlmd is successfully pushed." - output = MlmdFilePushedSuccess + output = MlmdFilePushedSuccess(mlmd_file_name) if response.json()["status"]=="exists": display_output = "Executions already exists." output = ExecutionsAlreadyExists @@ -140,7 +138,7 @@ def run(self): else: return "ERROR: Status Code = {status_code}. Unable to push mlmd." else: - raise PipelineNameNotFound(self.args.pipeline_name) + raise PipelineNotFound(self.args.pipeline_name) def add_parser(subparsers, parent_parser): diff --git a/cmflib/commands/metadata/push_old.py b/cmflib/commands/metadata/push_old.py deleted file mode 100644 index 6498b571..00000000 --- a/cmflib/commands/metadata/push_old.py +++ /dev/null @@ -1,178 +0,0 @@ -### -# Copyright (2023) Hewlett Packard Enterprise Development LP -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# You may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -### - -#!/usr/bin/env python3 -import argparse -import os -import json -from cmflib import cmfquery -from cmflib.cli.command import CmdBase -from cmflib.cli.utils import find_root -from cmflib.server_interface import server_interface -from cmflib.utils.cmf_config import CmfConfig -from cmflib.cmf_exception_handling import FileNotFound, ExecutionIDNotFound, PipelineNameNotFound, MlmdFilePulledSuccess, ExecutionsAlreadyExists, UpdateCmfVersion -# This class pushes mlmd file to cmf-server -class CmdMetadataPush(CmdBase): - def run(self): - current_directory = os.getcwd() - mlmd_file_name = "./mlmd" - - # checks if mlmd filepath is given - if self.args.file_name: - mlmd_file_name = self.args.file_name - current_directory = os.path.dirname(self.args.file_name) - - # checks if mlmd file is present in current directory or given directory - if not os.path.exists(mlmd_file_name): - raise FileNotFound(mlmd_file_name) - - query = cmfquery.CmfQuery(mlmd_file_name) - # print(json.dumps(json.loads(json_payload), indent=4, sort_keys=True)) - execution_flag = 0 - status_code = 0 - - # Get url from config - cmfconfig = os.environ.get("CONFIG_FILE",".cmfconfig") - - # find root_dir of .cmfconfig - output = find_root(cmfconfig) - - # in case, there is no .cmfconfig file - if output.find("'cmf' is not configured") != -1: - return output - - config_file_path = os.path.join(output, cmfconfig) - attr_dict = CmfConfig.read_config(config_file_path) - url = attr_dict.get("cmf-server-ip", "http://127.0.0.1:80") - - print("metadata push started") - print("........................................") - - # Checks if pipeline name exists - if self.args.pipeline_name in query.get_pipeline_names(): - # converts mlmd file to json format - json_payload = query.dumptojson(self.args.pipeline_name, None) - # checks if execution_id is given by user - if self.args.execution: - exec_id = self.args.execution - mlmd_data = json.loads(json_payload)["Pipeline"] - # checks if given execution_id present in mlmd - for i in mlmd_data[0]["stages"]: - for j in i["executions"]: - if j["id"] == int(exec_id): - execution_flag = 1 - # calling mlmd_push api to push mlmd file to cmf-server - response = server_interface.call_mlmd_push( - json_payload, url, exec_id, self.args.pipeline_name - ) - break - if execution_flag == 0: - raise ExecutionIDNotFound(exec_id) - else: - exec_id = None - response = server_interface.call_mlmd_push(json_payload, url, exec_id, self.args.pipeline_name) - status_code = response.status_code - - if status_code == 200 and response.json()['status']=="success": - pass - #return MlmdFilePulledSuccess(self.args.filename).handle() - elif status_code==200 and response.json()["status"]=="exists": - raise ExecutionsAlreadyExists - elif status_code==422 and response.json()["status"]=="version_update": - raise UpdateCmfVersion - elif status_code == 404: - return "ERROR: cmf-server is not available." - elif status_code == 500: - return "ERROR: Internal server error." - else: - return "ERROR: Status Code = {status_code}. Unable to push mlmd." - - if self.args.tensorboard: - # /tensorboard api call is done only if mlmd push is successfully completed - # tensorboard parameter is passed - print("......................................") - print("tensorboard logs upload started!!") - print("......................................") - - # check if the path provided is for a file - if os.path.isfile(self.args.tensorboard): - file_name = os.path.basename(self.args.tensorboard) - tresponse = server_interface.call_tensorboard(url, self.args.pipeline_name, file_name, self.args.tensorboard) - tstatus_code = tresponse.status_code - if tstatus_code == 200: - print("tensorboard logs: file {file_name} pushed successfully") - else: - return "ERROR: Failed to upload file {file_name}. Server response: {response.text}" - # If path provided is a directory - elif os.path.isdir(self.args.tensorboard): - # Recursively push all files and subdirectories - for root, dirs, files in os.walk(self.args.tensorboard): - for file_name in files: - file_path = os.path.join(root, file_name) - relative_path = os.path.relpath(file_path, self.args.tensorboard) - tresponse = server_interface.call_tensorboard(url, self.args.pipeline_name, relative_path, file_path) - if tresponse.status_code == 200: - print(f"tensorboard logs: File {file_name} uploaded successfully.") - else: - return f"ERROR: Failed to upload file {file_name}. Server response: {tresponse.text}" - return f"tensorboard logs: {self.args.tensorboard} uploaded successfully!!" - else: - return "ERROR: Invalid data path. Provide valid file/folder path for tensorboard logs!!" - else: - return MlmdFilePulledSuccess(self.args.filename).handle() - else: - raise PipelineNameNotFound(self.args.pipeline_name) - - -def add_parser(subparsers, parent_parser): - PUSH_HELP = "Push user-generated mlmd to server to create one single mlmd file for all the pipelines." - - parser = subparsers.add_parser( - "push", - parents=[parent_parser], - description="Push user's mlmd to cmf-server.", - help=PUSH_HELP, - formatter_class=argparse.RawDescriptionHelpFormatter, - ) - required_arguments = parser.add_argument_group("required arguments") - - required_arguments.add_argument( - "-p", - "--pipeline_name", - required=True, - help="Specify Pipeline name.", - metavar="", - ) - - parser.add_argument( - "-f", "--file_name", help="Specify mlmd file name.", metavar="" - ) - - parser.add_argument( - "-e", - "--execution", - help="Specify Execution id.", - metavar="", - ) - - parser.add_argument( - "-t", - "--tensorboard", - help="Specify path to tensorboard logs for the pipeline.", - metavar="" - ) - - parser.set_defaults(func=CmdMetadataPush) diff --git a/cmflib/storage_backends/local_artifacts.py b/cmflib/storage_backends/local_artifacts.py index 2fdb145b..f4146361 100644 --- a/cmflib/storage_backends/local_artifacts.py +++ b/cmflib/storage_backends/local_artifacts.py @@ -16,7 +16,6 @@ import os from dvc.api import DVCFileSystem -from cmflib.cmf_exception_handling import ObjectDownloadSuccess, ObjectDownloadFailure class LocalArtifacts: def download_artifacts( diff --git a/server/app/get_data.py b/server/app/get_data.py index d003b379..20e7d649 100644 --- a/server/app/get_data.py +++ b/server/app/get_data.py @@ -249,10 +249,8 @@ def create_unique_executions(server_store_path, req_info) -> str: # mlmd push is failed here status="version_update" return status - print(executions_client, executions_server,"comparision") if executions_server != []: list_executions_exists = list(set(executions_client).intersection(set(executions_server))) - print(list_executions_exists,"list_executions_exists") for i in mlmd_data["Pipeline"]: for stage in i['stages']: for cmf_exec in stage['executions'][:]: @@ -268,18 +266,15 @@ def create_unique_executions(server_store_path, req_info) -> str: if len(i['stages']) == 0 : status="exists" else: - print(mlmd_data,"mlmd_data") cmf_merger.parse_json_to_mlmd( json.dumps(mlmd_data), "/cmf-server/data/mlmd", "push", req_info["id"] ) - print("inside success condition") status='success' return status def get_mlmd_from_server(server_store_path: str, pipeline_name: str, exec_id: str): - print("get_mlmd_from_server") query = cmfquery.CmfQuery(server_store_path) json_payload = None df = pd.DataFrame() From 1bf5865ddf7a6be88927570a47ac9b7610a601a6 Mon Sep 17 00:00:00 2001 From: abhinav chobey Date: Thu, 12 Dec 2024 21:27:47 -0800 Subject: [PATCH 12/41] update --- cmflib/cmf_exception_handling.py | 59 ++++++++++++++-------- cmflib/commands/artifact/list.py | 6 ++- cmflib/commands/artifact/pull.py | 9 +--- cmflib/commands/artifact/push.py | 4 +- cmflib/commands/init/amazonS3.py | 4 +- cmflib/commands/init/local.py | 4 +- cmflib/commands/init/minioS3.py | 4 +- cmflib/commands/init/osdfremote.py | 4 +- cmflib/commands/init/sshremote.py | 4 +- cmflib/commands/metadata/push.py | 17 ++++--- cmflib/storage_backends/minio_artifacts.py | 2 +- 11 files changed, 67 insertions(+), 50 deletions(-) diff --git a/cmflib/cmf_exception_handling.py b/cmflib/cmf_exception_handling.py index 8eb5d0a6..9ca2a9a0 100644 --- a/cmflib/cmf_exception_handling.py +++ b/cmflib/cmf_exception_handling.py @@ -1,14 +1,31 @@ -""" - Response and Exceptions raised by the CMF. - CmfResponse includes two child classes, - 1. CmfSuccess - 2. CmfFailure - On the basis of success and failure various child classes are created. - -""" +### +# Copyright (2022) Hewlett Packard Enterprise Development LP +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# You may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +### + +#!/usr/bin/env python3 class CmfResponse(Exception): - """Base class for all the cmf responses and exceptions.""" + """ + Response and Exceptions raised by the CMF. + CmfResponse includes two child classes, + 1. CmfSuccess + 2. CmfFailure + On the basis of success and failure various child classes are created. + + Base class for all the cmf responses and exceptions. + """ def __init__(self, return_code=None, status="failure", *args): self.return_code = return_code @@ -31,7 +48,7 @@ def __init__(self, return_code=201): super().__init__(return_code) def handle(): - return "Executions already exists." + return "INFO: Executions already exists." class ObjectDownloadSuccess(CmfSuccess): def __init__(self,object_name,download_loc, return_code=202): @@ -66,7 +83,7 @@ def __init__(self, file_name, return_code=205): def handle(self): return f"SUCCESS: {self.file_name} is successfully pushed." -class MlmdAndTensorboardPushSuccess(CmfSuccess): +class TensorboardPushSuccess(CmfSuccess): def __init__(self, tensorboard_file_name:str = "All", return_code=206): self.tensorboard_file_name = tensorboard_file_name super().__init__(return_code) @@ -109,23 +126,25 @@ def __init__(self,pipeline_name,return_code=101): super().__init__(return_code) def handle(self): - return f"ERROR: Pipeline_name {self.pipeline_name} doesn't exist." + return f"ERROR: Pipeline name {self.pipeline_name} doesn't exist." class FileNotFound(CmfFailure): - def __init__(self,file_name,return_code=102): + def __init__(self,file_name, directory, return_code=102): + self.directory = directory self.file_name =file_name super().__init__(return_code) def handle(self): - return f"ERROR: File {self.file_name} Not Found." + return f"ERROR: File {self.file_name} doesn't exists in {self.directory} directory." class BucketNotFound(CmfFailure): - def __init__(self,return_code=103): + def __init__(self,bucket_name, return_code=103): + self.bucket_name = bucket_name super().__init__(return_code) def handle(self): - return f"ERROR: Bucket doesn't exist." + return f"ERROR: Bucket {self.bucket_name} doesn't exist." class ExecutionsNotFound(CmfFailure): def __init__(self, return_code=104): @@ -196,7 +215,7 @@ def __init__(self, return_code=112): def handle(self): return "ERROR: You need to update cmf to the latest version. Unable to push metadata file." -class MlmdAndTensorboardPushFailure(CmfFailure): +class TensorboardPushFailure(CmfFailure): def __init__(self,tensorboard_file_name,response_text, return_code=113): self.tensorboard_file_name = tensorboard_file_name self.response_text = response_text @@ -205,7 +224,7 @@ def __init__(self,tensorboard_file_name,response_text, return_code=113): def handle(self): return f"ERROR: Failed to upload file {self.tensorboard_file_name}. Server response: {self.response_text}." -class ArgumentNotProvided(CmfFailure): +class Neo4jArgumentNotProvided(CmfFailure): def __init__(self, return_code=114): super().__init__(return_code) @@ -231,14 +250,14 @@ def __init__(self, return_code=117): super().__init__(return_code) def handle(self): - return "cmf-server error: Please restart the cmf-server." + return "cmf-server error: The server encountered an unexpected error." class MlmdFilePullFailure(CmfFailure): def __init__(self, return_code=118): super().__init__(return_code) def handle(self): - return "ERROR: Unable to pull mlmd." + return "ERROR: Unable to pull metadata file." class DirectoryNotfound(CmfFailure): def __init__(self,current_dir, return_code=119): diff --git a/cmflib/commands/artifact/list.py b/cmflib/commands/artifact/list.py index 69c46c39..2cd64f48 100644 --- a/cmflib/commands/artifact/list.py +++ b/cmflib/commands/artifact/list.py @@ -137,6 +137,7 @@ def run(self): # default path for mlmd file name mlmd_file_name = "./mlmd" + current_directory = os.getcwd() if not self.args.file_name: # If self.args.file_name is None or an empty list ([]). mlmd_file_name = "./mlmd" # Default path for mlmd file name. elif len(self.args.file_name) > 1: # If the user provided more than one file name. @@ -145,10 +146,11 @@ def run(self): return "Error: Missing File name" else: mlmd_file_name = self.args.file_name[0].strip() + current_directory = os.path.dirname(self.args.file_name) if mlmd_file_name == "mlmd": mlmd_file_name = "./mlmd" if not os.path.exists(mlmd_file_name): - raise FileNotFound(mlmd_file_name) + raise FileNotFound(mlmd_file_name, current_directory) # Creating cmfquery object. query = cmfquery.CmfQuery(mlmd_file_name) @@ -164,7 +166,7 @@ def run(self): df = query.get_all_artifacts_by_context(pipeline_name) if df.empty: - raise PipelineNotFound + raise PipelineNotFound(pipeline_name) else: if not self.args.artifact_name: # If self.args.artifact_name is None or an empty list ([]). pass diff --git a/cmflib/commands/artifact/pull.py b/cmflib/commands/artifact/pull.py index 2b58bd60..55613709 100644 --- a/cmflib/commands/artifact/pull.py +++ b/cmflib/commands/artifact/pull.py @@ -171,7 +171,7 @@ def run(self): mlmd_file_name = "./mlmd" current_directory = os.path.dirname(mlmd_file_name) if not os.path.exists(mlmd_file_name): #checking if MLMD files exists - raise FileNotFound(mlmd_file_name) + raise FileNotFound(mlmd_file_name, current_directory) if self.args.artifact_name == "": raise ArtifactNotFound("") if self.args.pipeline_name == "": #checking if pipeline_name is not "" @@ -180,13 +180,6 @@ def run(self): if not query.get_pipeline_id(self.args.pipeline_name) > 0: #checking if pipeline name exists in mlmd raise PipelineNotFound(self.args.pipeline_name) - # dvc_config_op = DvcConfig.get_dvc_config() - # cmf_config_file = os.environ.get("CONFIG_FILE", ".cmfconfig") - # cmf_config={} - # cmf_config=CmfConfig.read_config(cmf_config_file) - # out_msg = check_minio_server(dvc_config_op) - # if dvc_config_op["core.remote"] == "minio" and out_msg != "SUCCESS": #checking if minios3 server is active - # raise Minios3ServerInactive() # getting all pipeline stages[i.e Prepare, Featurize, Train and Evaluate] stages = query.get_pipeline_stages(self.args.pipeline_name) executions = [] diff --git a/cmflib/commands/artifact/push.py b/cmflib/commands/artifact/push.py index d91bad2a..f75ce566 100644 --- a/cmflib/commands/artifact/push.py +++ b/cmflib/commands/artifact/push.py @@ -57,12 +57,14 @@ def run(self): # Default path of mlmd file mlmd_file_name = "./mlmd" + current_directory = os.getcwd() if self.args.file_name: mlmd_file_name = self.args.file_name if mlmd_file_name == "mlmd": mlmd_file_name = "./mlmd" + current_directory = os.path.dirname(mlmd_file_name) if not os.path.exists(mlmd_file_name): - raise FileNotFound(mlmd_file_name) + raise FileNotFound(mlmd_file_name, current_directory) # creating cmfquery object query = cmfquery.CmfQuery(mlmd_file_name) diff --git a/cmflib/commands/init/amazonS3.py b/cmflib/commands/init/amazonS3.py index 3d8edb3c..bc5f96c4 100644 --- a/cmflib/commands/init/amazonS3.py +++ b/cmflib/commands/init/amazonS3.py @@ -30,7 +30,7 @@ ) from cmflib.utils.cmf_config import CmfConfig from cmflib.utils.helper_functions import is_git_repo -from cmflib.cmf_exception_handling import ArgumentNotProvided, CmfInitComplete, CmfInitFailed +from cmflib.cmf_exception_handling import Neo4jArgumentNotProvided, CmfInitComplete, CmfInitFailed class CmdInitAmazonS3(CmdBase): def run(self): @@ -63,7 +63,7 @@ def run(self): ): pass else: - raise ArgumentNotProvided + raise Neo4jArgumentNotProvided output = is_git_repo() if not output: diff --git a/cmflib/commands/init/local.py b/cmflib/commands/init/local.py index 1086e9ce..6eeb64a7 100644 --- a/cmflib/commands/init/local.py +++ b/cmflib/commands/init/local.py @@ -17,7 +17,7 @@ #!/usr/bin/env python3 import argparse import os -from cmflib.cmf_exception_handling import ArgumentNotProvided, CmfInitComplete, CmfInitFailed +from cmflib.cmf_exception_handling import Neo4jArgumentNotProvided, CmfInitComplete, CmfInitFailed from cmflib.cli.command import CmdBase from cmflib.dvc_wrapper import ( git_quiet_init, @@ -61,7 +61,7 @@ def run(self): ): pass else: - raise ArgumentNotProvided + raise Neo4jArgumentNotProvided output = is_git_repo() diff --git a/cmflib/commands/init/minioS3.py b/cmflib/commands/init/minioS3.py index 4e859458..38f0bfbf 100644 --- a/cmflib/commands/init/minioS3.py +++ b/cmflib/commands/init/minioS3.py @@ -30,7 +30,7 @@ ) from cmflib.utils.cmf_config import CmfConfig from cmflib.utils.helper_functions import is_git_repo -from cmflib.cmf_exception_handling import ArgumentNotProvided, CmfInitComplete, CmfInitFailed +from cmflib.cmf_exception_handling import Neo4jArgumentNotProvided, CmfInitComplete, CmfInitFailed class CmdInitMinioS3(CmdBase): def run(self): @@ -63,7 +63,7 @@ def run(self): ): pass else: - raise ArgumentNotProvided + raise Neo4jArgumentNotProvided output = is_git_repo() if not output: branch_name = "master" diff --git a/cmflib/commands/init/osdfremote.py b/cmflib/commands/init/osdfremote.py index d7cbe607..d4033b9a 100644 --- a/cmflib/commands/init/osdfremote.py +++ b/cmflib/commands/init/osdfremote.py @@ -18,7 +18,7 @@ #!/usr/bin/env python3 import argparse import os -from cmflib.cmf_exception_handling import CmfInitComplete, CmfInitFailed, ArgumentNotProvided +from cmflib.cmf_exception_handling import CmfInitComplete, CmfInitFailed, Neo4jArgumentNotProvided from cmflib.cli.command import CmdBase from cmflib.dvc_wrapper import ( git_quiet_init, @@ -64,7 +64,7 @@ def run(self): ): pass else: - raise ArgumentNotProvided + raise Neo4jArgumentNotProvided output = is_git_repo() if not output: branch_name = "master" diff --git a/cmflib/commands/init/sshremote.py b/cmflib/commands/init/sshremote.py index 73afb28b..ca1636fb 100644 --- a/cmflib/commands/init/sshremote.py +++ b/cmflib/commands/init/sshremote.py @@ -31,7 +31,7 @@ ) from cmflib.utils.cmf_config import CmfConfig from cmflib.utils.helper_functions import is_git_repo -from cmflib.cmf_exception_handling import ArgumentNotProvided, CmfInitComplete, CmfInitFailed +from cmflib.cmf_exception_handling import Neo4jArgumentNotProvided, CmfInitComplete, CmfInitFailed class CmdInitSSHRemote(CmdBase): def run(self): @@ -64,7 +64,7 @@ def run(self): ): pass else: - raise ArgumentNotProvided + raise Neo4jArgumentNotProvided output = is_git_repo() if not output: branch_name = "master" diff --git a/cmflib/commands/metadata/push.py b/cmflib/commands/metadata/push.py index 00e4d250..e427ee2e 100644 --- a/cmflib/commands/metadata/push.py +++ b/cmflib/commands/metadata/push.py @@ -23,7 +23,7 @@ from cmflib.cli.utils import find_root from cmflib.server_interface import server_interface from cmflib.utils.cmf_config import CmfConfig -from cmflib.cmf_exception_handling import MlmdAndTensorboardPushSuccess, MlmdAndTensorboardPushFailure, MlmdFilePushedSuccess, ExecutionsAlreadyExists +from cmflib.cmf_exception_handling import TensorboardPushSuccess, TensorboardPushFailure, MlmdFilePushSuccess, ExecutionsAlreadyExists from cmflib.cmf_exception_handling import FileNotFound, ExecutionIDNotFound, PipelineNotFound, ExecutionsAlreadyExists, UpdateCmfVersion, CmfServerNotAvailable, InternalServerError, CmfNotConfigured, InvalidTensorboardFilePath # This class pushes mlmd file to cmf-server class CmdMetadataPush(CmdBase): @@ -43,14 +43,15 @@ def run(self): url = attr_dict.get("cmf-server-ip", "http://127.0.0.1:80") mlmd_file_name = "./mlmd" - + current_directory = os.getcwd() # checks if mlmd filepath is given if self.args.file_name: mlmd_file_name = self.args.file_name + current_directory = os.path.dirname(self.args.file_name) # checks if mlmd file is present in current directory or given directory if not os.path.exists(mlmd_file_name): - raise FileNotFound(mlmd_file_name) + raise FileNotFound(mlmd_file_name, current_directory) query = cmfquery.CmfQuery(mlmd_file_name) # print(json.dumps(json.loads(json_payload), indent=4, sort_keys=True)) @@ -88,7 +89,7 @@ def run(self): display_output = "" if response.json()['status']=="success": display_output = "mlmd is successfully pushed." - output = MlmdFilePushedSuccess(mlmd_file_name) + output = MlmdFilePushSuccess(mlmd_file_name) if response.json()["status"]=="exists": display_output = "Executions already exists." output = ExecutionsAlreadyExists @@ -109,10 +110,10 @@ def run(self): tstatus_code = tresponse.status_code if tstatus_code == 200: # give status code as success - return MlmdAndTensorboardPushSuccess() + return TensorboardPushSuccess() else: # give status code as failure - return MlmdAndTensorboardPushFailure(file_name,tresponse.text) + return TensorboardPushFailure(file_name,tresponse.text) # If path provided is a directory elif os.path.isdir(self.args.tensorboard): # Recursively push all files and subdirectories @@ -125,8 +126,8 @@ def run(self): print(f"tensorboard logs: File {file_name} uploaded successfully.") else: # give status as failure - return MlmdAndTensorboardPushFailure(file_name,tresponse.text) - return MlmdAndTensorboardPushSuccess() + return TensorboardPushFailure(file_name,tresponse.text) + return TensorboardPushSuccess() else: return InvalidTensorboardFilePath() elif status_code==422 and response.json()["status"]=="version_update": diff --git a/cmflib/storage_backends/minio_artifacts.py b/cmflib/storage_backends/minio_artifacts.py index 251a0d84..505ff333 100644 --- a/cmflib/storage_backends/minio_artifacts.py +++ b/cmflib/storage_backends/minio_artifacts.py @@ -38,7 +38,7 @@ def download_artifacts( ) found = client.bucket_exists(bucket_name) if not found: #check if minio bucket exists - raise BucketNotFound() + raise BucketNotFound(bucket_name) response = "" From a02e5e232b37fface2e7b4ed3dabef9969dc9ae8 Mon Sep 17 00:00:00 2001 From: Varkha Sharma Date: Thu, 12 Dec 2024 22:49:13 -0800 Subject: [PATCH 13/41] made some changes in local_artifacts.py and other files --- cmflib/cmf_exception_handling.py | 8 +- cmflib/commands/artifact/list.py | 2 +- cmflib/commands/artifact/pull.py | 109 +++++++++++++------- cmflib/commands/metadata/push.py | 2 +- cmflib/storage_backends/local_artifacts.py | 111 ++++++++++++--------- 5 files changed, 143 insertions(+), 89 deletions(-) diff --git a/cmflib/cmf_exception_handling.py b/cmflib/cmf_exception_handling.py index 9ca2a9a0..949743f9 100644 --- a/cmflib/cmf_exception_handling.py +++ b/cmflib/cmf_exception_handling.py @@ -1,5 +1,5 @@ ### -# Copyright (2022) Hewlett Packard Enterprise Development LP +# Copyright (2024) Hewlett Packard Enterprise Development LP # # Licensed under the Apache License, Version 2.0 (the "License"); # You may not use this file except in compliance with the License. @@ -90,7 +90,7 @@ def __init__(self, tensorboard_file_name:str = "All", return_code=206): def handle(self): if self.tensorboard_file_name == "All": - return f"tensorboard logs: files pushed successfully" + return f"SUCCESS: All tensorboard logs pushed successfully." return f"tensorboard logs: file {self.tensorboard_file_name} pushed successfully." class CmfInitComplete(CmfSuccess): @@ -130,7 +130,7 @@ def handle(self): class FileNotFound(CmfFailure): - def __init__(self,file_name, directory, return_code=102): + def __init__(self, file_name, directory, return_code=102): self.directory = directory self.file_name =file_name super().__init__(return_code) @@ -184,7 +184,7 @@ def __init__(self,files_downloaded, Files_failed_to_download, return_code=108): super().__init__(return_code) def handle(self): - return f"Number of files downloaded = {self.files_downloaded }. Files failed to download = {self.Files_failed_to_download}." + return f"INFO: Number of files downloaded = {self.files_downloaded }. Files failed to download = {self.Files_failed_to_download}." class Minios3ServerInactive(CmfFailure): def __init__(self,return_code=109): diff --git a/cmflib/commands/artifact/list.py b/cmflib/commands/artifact/list.py index 2cd64f48..4a5aedf0 100644 --- a/cmflib/commands/artifact/list.py +++ b/cmflib/commands/artifact/list.py @@ -223,7 +223,7 @@ def run(self): break return "End of records.." else: - return ArtifactNotFound + return ArtifactNotFound(self.args.artifact_name) df = self.convert_to_datetime(df, "create_time_since_epoch") self.display_table(df) diff --git a/cmflib/commands/artifact/pull.py b/cmflib/commands/artifact/pull.py index 55613709..63f8ef4c 100644 --- a/cmflib/commands/artifact/pull.py +++ b/cmflib/commands/artifact/pull.py @@ -28,7 +28,16 @@ ) from cmflib.cli.command import CmdBase from cmflib.utils.dvc_config import DvcConfig -from cmflib.cmf_exception_handling import PipelineNotFound, FileNotFound, ExecutionsNotFound, ArtifactNotFound, BatchDownloadFailure, BatchDownloadSuccess,ObjectDownloadFailure, ObjectDownloadSuccess +from cmflib.cmf_exception_handling import ( + PipelineNotFound, + FileNotFound, + ExecutionsNotFound, + ArtifactNotFound, + BatchDownloadFailure, + BatchDownloadSuccess, + ObjectDownloadFailure, + ObjectDownloadSuccess +) from cmflib.cli.utils import check_minio_server class CmdArtifactPull(CmdBase): @@ -213,8 +222,6 @@ def run(self): if type(output) is not dict: return output dvc_config_op = output - total_files_count = 0 - files_download_completed = 0 if dvc_config_op["core.remote"] == "minio": minio_class_obj = minio_artifacts.MinioArtifacts() if self.args.artifact_name: #checking if artifact_name is in mlmd @@ -245,6 +252,8 @@ def run(self): status = BatchDownloadFailure(total_files_in_directory, file_failed_to_downloaded) return status else: + files_downloaded = 0 + files_failed_to_download = 0 for name, url in name_url_dict.items(): if not isinstance(url, str): continue @@ -271,7 +280,12 @@ def run(self): status = BatchDownloadFailure(files_downloaded=files_downloaded, Files_failed_to_download= Files_failed_to_download) return status elif dvc_config_op["core.remote"] == "local-storage": - local_class_obj = local_artifacts.LocalArtifacts() + local_class_obj = local_artifacts.LocalArtifacts(dvc_config_op) + # There are two main conditions + # Condition 1 - user can use -a paramter for cmf artifact pull command + # -a can be a dir or a file + # Condition 2 - user can chose to download all the artifacts in one go. + # we can have both dir and files in our list of artifacts if self.args.artifact_name: output = self.search_artifact(name_url_dict) # output[0] = name @@ -281,44 +295,71 @@ def run(self): raise ArtifactNotFound(self.args.artifact_name) else: local_args = self.extract_repo_args("local", output[0], output[1], current_directory) - total_files_in_directory,file_downloaded,return_code = local_class_obj.download_artifacts( - dvc_config_op, current_directory, local_args[0], local_args[1] - ) - file_failed_to_download = total_files_in_directory - file_downloaded - if not local_args[0].endswith(".dir"): - if return_code ==206: - file_downloaded = 1 - else: - file_failed_to_downloaded = 1 + # local_args [0] = current_dvc_loc + # local_args [1] = download_loc - if return_code == 206: - status = BatchDownloadSuccess(file_downloaded) + if not local_args[0].endswith(".dir"): + object_name, download_loc, download_flag = local_class_obj.download_file(current_directory, local_args[0], local_args[1]) + if download_flag: + return ObjectDownloadSuccess(object_name, download_loc) + else: + return ObjectDownloadFailure(object_name) + else: - status = BatchDownloadFailure(total_files_in_directory, file_failed_to_downloaded) - return status + # we are downloading multiple files from a directory + # return total_files_in_directory, files_downloaded + total_files_in_directory, dir_files_downloaded, download_flag = local_class_obj.download_directory(current_directory, local_args[0], local_args[1]) + + if download_flag: + return BatchDownloadSuccess(dir_files_downloaded) + else: + file_failed_to_download = total_files_in_directory - dir_files_downloaded + return BatchDownloadFailure(dir_files_downloaded, file_failed_to_downloaded) else: + files_downloaded = 0 + files_failed_to_download = 0 for name, url in name_url_dict.items(): if not isinstance(url, str): continue + # name1 - file + # name2 - failed file + # name3 - dir (5 files) + # name4 - dir (4 files) - failed dir - 2 files passed, 2 files failed + # name5 - file + # name6 - dir - and can't open it (but it has 2 files) .. user don't know local_args = self.extract_repo_args("local", name, url, current_directory) - if not local_args[1].endswith(".dir"): - total_files_count += 1 - # local_args[0] = current dvc location - # local_args[1] = current download location - total_files_in_dir,count_files_success,return_code = local_class_obj.download_artifacts( - dvc_config_op, current_directory, local_args[0], local_args[1] - ) - total_files_count += total_files_in_dir - files_download_completed += count_files_success - if return_code == 206 and not local_args[1].endswith(".dir") : - files_download_completed += 1 - files_downloaded = files_download_completed + count_files_success - Files_failed_to_download = total_files_in_dir + total_files_count - files_download_completed - count_files_success - if Files_failed_to_download == 0: - status = BatchDownloadSuccess(files_downloaded=files_downloaded) + # local_args [0] = current_dvc_loc + # local_args [1] = download_loc + + if not local_args[0].endswith(".dir"): + print("current dvc loc = ", local_args[0]) + object_name, download_loc, download_flag = local_class_obj.download_file( + current_directory, local_args[0], local_args[1]) + # print output here because we are in a loop and can't return the control + if download_flag: + print(f"object {object_name} downloaded at {download_loc}.") + files_downloaded += 1 + else: + files_failed_to_download += 1 + else: + print("i should come here once") + # we are downloading multiple files from a directory + total_files_in_directory, dir_files_downloaded, download_flag = local_class_obj.download_directory( + current_directory, local_args[0], local_args[1]) + # download_flag is true only when all the files from the directory are successfully downlaoded. + if download_flag: + files_downloaded += dir_files_downloaded + else: + files_downloaded += dir_files_downloaded + files_failed_to_download += (total_files_in_directory - dir_files_downloaded) + + # we are assuming, if files_failed_to_download > 0, it means our download of artifacts is not success + if not files_failed_to_download: + return BatchDownloadSuccess(files_downloaded) else: - status = BatchDownloadFailure(files_downloaded=files_downloaded, Files_failed_to_download= Files_failed_to_download) - return status + return BatchDownloadFailure( + files_downloaded, files_failed_to_download) + elif dvc_config_op["core.remote"] == "ssh-storage": sshremote_class_obj = sshremote_artifacts.SSHremoteArtifacts() if self.args.artifact_name: diff --git a/cmflib/commands/metadata/push.py b/cmflib/commands/metadata/push.py index e427ee2e..0ece69b4 100644 --- a/cmflib/commands/metadata/push.py +++ b/cmflib/commands/metadata/push.py @@ -110,7 +110,7 @@ def run(self): tstatus_code = tresponse.status_code if tstatus_code == 200: # give status code as success - return TensorboardPushSuccess() + return TensorboardPushSuccess(file_name) else: # give status code as failure return TensorboardPushFailure(file_name,tresponse.text) diff --git a/cmflib/storage_backends/local_artifacts.py b/cmflib/storage_backends/local_artifacts.py index f4146361..c29db00d 100644 --- a/cmflib/storage_backends/local_artifacts.py +++ b/cmflib/storage_backends/local_artifacts.py @@ -16,21 +16,48 @@ import os from dvc.api import DVCFileSystem +from cmflib.cmf_exception_handling import ObjectDownloadSuccess -class LocalArtifacts: - def download_artifacts( +class LocalArtifacts(): + # This class downloads one local artifact at a time and if the passed artifact is a directory + # then, it downloads all the files from the directory + + def __init__(self, dvc_config_op): + self.fs = fs = DVCFileSystem( + dvc_config_op["remote.local-storage.url"] + ) # dvc_config_op[1] is file system path - "/path/to/local/repository" + + def download_file( self, - dvc_config_op, current_directory: str, object_name: str, download_loc: str, ): - obj = True + # get_file() only creates file, to put artifacts in proper directory, subfolders are required. + # download_loc = contains absolute path of the file with file name and extension + dir_path = "" + if "/" in download_loc: + dir_path, _ = download_loc.rsplit("/", 1) + if dir_path != "": + os.makedirs(dir_path, mode=0o777, exist_ok=True) # creating subfolders if needed + try: - fs = DVCFileSystem( - dvc_config_op["remote.local-storage.url"] - ) # dvc_config_op[1] is file system path - "/path/to/local/repository" - + response = self.fs.get_file(object_name, download_loc) + + if response == None: # get_file() returns none when file gets downloaded. + return object_name, download_loc, True + else: + return object_name, download_loc, False + except Exception as e: + return object_name, download_loc, False + + + def download_directory( + self, + current_directory: str, + object_name: str, + download_loc: str, + ): # get_file() only creates file, to put artifacts in proper directory, subfolders are required. # download_loc = contains absolute path of the file with file name and extension dir_path = "" @@ -39,31 +66,23 @@ def download_artifacts( if dir_path != "": os.makedirs(dir_path, mode=0o777, exist_ok=True) # creating subfolders if needed - response = "" - """" if object_name ends with .dir - it is a directory. we download .dir object with 'temp_dir' and remove this after all the files from this .dir object is downloaded. """ + # in case of .dir, download_loc is a absolute path for a folder + os.makedirs(download_loc, mode=0o777, exist_ok=True) total_files_in_directory = 0 - file_download_success = 0 - download_success_return_code = 206 - download_failure_return_code = 207 - if object_name.endswith('.dir'): - print("inside") - # in case of .dir, download_loc is a absolute path for a folder - os.makedirs(download_loc, mode=0o777, exist_ok=True) - - # download the .dir object - temp_dir = f"{download_loc}/dir" - try: - response = fs.get_file(object_name, temp_dir) - except Exception as e: - print(f"object {object_name} is not downloaded.") - return total_files_in_directory,file_download_success,download_failure_return_code + files_downloaded = 0 + # download the .dir object + temp_dir = f"{download_loc}/dir" + try: + # we are getting .dir object which contains information about all the files tracked inside it + response = self.fs.get_file(object_name, temp_dir) with open(temp_dir, 'r') as file: tracked_files = eval(file.read()) + # removing temp_dir if os.path.exists(temp_dir): os.remove(temp_dir) @@ -75,8 +94,8 @@ def download_artifacts( which will leave us with the artifact repo path """ repo_path = "/".join(object_name.split("/")[:-2]) - file_download_failure = 0 + obj = True for file_info in tracked_files: total_files_in_directory += 1 relpath = file_info['relpath'] @@ -87,33 +106,27 @@ def download_artifacts( temp_object_name = f"{repo_path}/{formatted_md5}" temp_download_loc = f"{download_loc}/{relpath}" try: - obj = fs.get_file(temp_object_name, temp_download_loc) + obj = self.fs.get_file(temp_object_name, temp_download_loc) if obj == None: - file_download_success += 1 + files_downloaded += 1 print(f"object {temp_object_name} downloaded at {temp_download_loc}.") else: print(f"object {temp_object_name} is not downloaded.") - file_download_failure += 1 + # this exception is for get_file() function for temp_object_name except Exception as e: print(f"object {temp_object_name} is not downloaded.") - file_download_failure += 1 - if file_download_failure == 0: # if count_failed is 0 it means all the objects of directory are downloaded - response = None - else: # if count_failed is greater than 0 it means some artifacts or all are not downloaded - response = False - else: - try: - response = fs.get_file(object_name, download_loc) - except Exception as e: - print(f"object {object_name} is not downloaded.") - return total_files_in_directory,file_download_success,download_failure_return_code - if response == None: # get_file() returns none when file gets downloaded. - print(f"object {object_name} downloaded at {download_loc}.") - return total_files_in_directory,file_download_success, download_success_return_code - if response == False: + + # total_files - files_downloaded gives us the number of files which are failed to download + if (total_files_in_directory - files_downloaded) == 0: + return total_files_in_directory, files_downloaded, True + else: + return total_files_in_directory, files_downloaded, False + # this exception is for get_file() function for object_name + except Exception as e: print(f"object {object_name} is not downloaded.") - return total_files_in_directory,file_download_success,download_failure_return_code - except TypeError as exception: - return exception - except Exception as exception: - return exception + # need to improve this + # We usually don't count .dir as a file while counting total_files_in_directory. + # However, here we failed to download the .dir folder itself. So we need to make + # total_files_in_directory = 1, because .............. + total_files_in_directory = 1 + return total_files_in_directory, files_downloaded, False From 7bc9b73c97b82dc3b97795c6745b6395abcb3aa3 Mon Sep 17 00:00:00 2001 From: Varkha Sharma Date: Thu, 12 Dec 2024 23:49:56 -0800 Subject: [PATCH 14/41] adding minio_artifact.py code --- cmflib/commands/artifact/pull.py | 94 ++++++---- cmflib/storage_backends/local_artifacts.py | 3 +- cmflib/storage_backends/minio_artifacts.py | 201 +++++++++++---------- 3 files changed, 166 insertions(+), 132 deletions(-) diff --git a/cmflib/commands/artifact/pull.py b/cmflib/commands/artifact/pull.py index 63f8ef4c..3ad54081 100644 --- a/cmflib/commands/artifact/pull.py +++ b/cmflib/commands/artifact/pull.py @@ -223,7 +223,7 @@ def run(self): return output dvc_config_op = output if dvc_config_op["core.remote"] == "minio": - minio_class_obj = minio_artifacts.MinioArtifacts() + minio_class_obj = minio_artifacts.MinioArtifacts(dvc_config_op) if self.args.artifact_name: #checking if artifact_name is in mlmd output = self.search_artifact(name_url_dict) # output[0] = name @@ -232,25 +232,32 @@ def run(self): raise ArtifactNotFound(self.args.artifact_name) else: minio_args = self.extract_repo_args("minio", output[0], output[1], current_directory) - total_files_in_directory,file_downloaded,return_code = minio_class_obj.download_artifacts( - dvc_config_op, - current_directory, - minio_args[0], # bucket_name - minio_args[1], # object_name - minio_args[2], # path_name - ) - file_failed_to_download = total_files_in_directory - file_downloaded - if not minio_args[0].endswith(".dir"): - if return_code == 206: - file_downloaded = 1 - else: - file_failed_to_downloaded = 1 - - if return_code == 206: - status = BatchDownloadSuccess(file_downloaded) + if not minio_args[1].endswith(".dir"): + object_name, download_loc, download_flag = minio_class_obj.download_file( + current_directory, + minio_args[0], # bucket_name + minio_args[1], # object_name + minio_args[2], # path_name + ) + if download_flag: + return ObjectDownloadSuccess(object_name, download_loc) + else: + return ObjectDownloadFailure(object_name) else: - status = BatchDownloadFailure(total_files_in_directory, file_failed_to_downloaded) - return status + # we are downloading multiple files from a directory + # return total_files_in_directory, files_downloaded + total_files_in_directory, dir_files_downloaded, download_flag = minio_class_obj.download_directory( + current_directory, + minio_args[0], # bucket_name + minio_args[1], # object_name + minio_args[2], # path_name + ) + + if download_flag: + return BatchDownloadSuccess(dir_files_downloaded) + else: + file_failed_to_download = total_files_in_directory - dir_files_downloaded + return BatchDownloadFailure(dir_files_downloaded, file_failed_to_downloaded) else: files_downloaded = 0 files_failed_to_download = 0 @@ -259,26 +266,37 @@ def run(self): continue minio_args = self.extract_repo_args("minio", name, url, current_directory) if not minio_args[1].endswith(".dir"): - total_files_count += 1 - total_files_in_dir,count_files_success,return_code = minio_class_obj.download_artifacts( - dvc_config_op, - current_directory, - minio_args[0], # bucket_name - minio_args[1], # object_name - minio_args[2], # path_name - ) - total_files_count += total_files_in_dir - files_download_completed += count_files_success - #print(total_files_in_dir,count_files_success,"total_files_in_dir,count_files_success") - if return_code == 206 and not minio_args[1].endswith(".dir") : - files_download_completed += 1 - files_downloaded = files_download_completed + count_files_success - Files_failed_to_download = total_files_in_dir + total_files_count - files_download_completed - count_files_success - if Files_failed_to_download == 0: - status = BatchDownloadSuccess(files_downloaded=files_downloaded) + object_name, download_loc, download_flag = minio_class_obj.download_file( + current_directory, + minio_args[0], # bucket_name + minio_args[1], # object_name + minio_args[2], # path_name + ) + # print output here because we are in a loop and can't return the control + if download_flag: + print(f"object {object_name} downloaded at {download_loc}.") + files_downloaded += 1 + else: + files_failed_to_download += 1 + else: + total_files_in_directory, dir_files_downloaded, download_flag = minio_class_obj.download_directory( + current_directory, + minio_args[0], # bucket_name + minio_args[1], # object_name + minio_args[2], # path_name + ) + # download_flag is true only when all the files from the directory are successfully downlaoded. + if download_flag: + files_downloaded += dir_files_downloaded + else: + files_downloaded += dir_files_downloaded + files_failed_to_download += (total_files_in_directory - dir_files_downloaded) + + # we are assuming, if files_failed_to_download > 0, it means our download of artifacts is not success + if not files_failed_to_download: + return BatchDownloadSuccess(files_downloaded) else: - status = BatchDownloadFailure(files_downloaded=files_downloaded, Files_failed_to_download= Files_failed_to_download) - return status + return BatchDownloadFailure(files_downloaded, files_failed_to_download) elif dvc_config_op["core.remote"] == "local-storage": local_class_obj = local_artifacts.LocalArtifacts(dvc_config_op) # There are two main conditions diff --git a/cmflib/storage_backends/local_artifacts.py b/cmflib/storage_backends/local_artifacts.py index c29db00d..8570c1ac 100644 --- a/cmflib/storage_backends/local_artifacts.py +++ b/cmflib/storage_backends/local_artifacts.py @@ -43,7 +43,6 @@ def download_file( try: response = self.fs.get_file(object_name, download_loc) - if response == None: # get_file() returns none when file gets downloaded. return object_name, download_loc, True else: @@ -116,7 +115,7 @@ def download_directory( except Exception as e: print(f"object {temp_object_name} is not downloaded.") - # total_files - files_downloaded gives us the number of files which are failed to download + # total_files - files_downloaded gives us the number of files which are failed to download if (total_files_in_directory - files_downloaded) == 0: return total_files_in_directory, files_downloaded, True else: diff --git a/cmflib/storage_backends/minio_artifacts.py b/cmflib/storage_backends/minio_artifacts.py index 505ff333..af27e6b1 100644 --- a/cmflib/storage_backends/minio_artifacts.py +++ b/cmflib/storage_backends/minio_artifacts.py @@ -20,110 +20,127 @@ from cmflib.cmf_exception_handling import BucketNotFound class MinioArtifacts: - def download_artifacts( - self, - dvc_config_op, + + def __init__(self, dvc_config_op): + # dvc_config_op["remote.minio.endpointurl"] = http://XX.XX.XX.XX:9000 + self.endpoint = dvc_config_op["remote.minio.endpointurl"].split("http://")[1] + self.access_key = dvc_config_op["remote.minio.access_key_id"] + self.secret_key = dvc_config_op["remote.minio.secret_access_key"] + self.client = Minio( + self.endpoint, access_key=self.access_key, secret_key=self.secret_key, secure=False + ) + + + def download_file( + self, current_directory: str, bucket_name: str, object_name: str, download_loc: str, - ): - # dvc_config_op["remote.minio.endpointurl"] = http://XX.XX.XX.XX:9000 - endpoint = dvc_config_op["remote.minio.endpointurl"].split("http://")[1] - access_key = dvc_config_op["remote.minio.access_key_id"] - secret_key = dvc_config_op["remote.minio.secret_access_key"] + ): try: - client = Minio( - endpoint, access_key=access_key, secret_key=secret_key, secure=False - ) - found = client.bucket_exists(bucket_name) + found = self.client.bucket_exists(bucket_name) if not found: #check if minio bucket exists raise BucketNotFound(bucket_name) + response = self.client.fget_object(bucket_name, object_name, download_loc) + if response: + return object_name, download_loc, True + else: + return object_name, download_loc, False + except Exception as e: + return object_name, download_loc, False - response = "" - """" - if object_name ends with .dir - it is a directory. - we download .dir object with 'temp_dir' and remove - this after all the files from this .dir object is downloaded. - """ - #print("inside download arti") - total_files_in_directory = 0 - file_download_success = 0 - download_success_return_code = 206 - download_failure_return_code = 207 - if object_name.endswith('.dir'): - print("inside if loop") - # in case of .dir, download_loc is a absolute path for a folder - os.makedirs(download_loc, mode=0o777, exist_ok=True) + def download_directory( + self, + current_directory: str, + bucket_name: str, + object_name: str, + download_loc: str, + ): + found = self.client.bucket_exists(bucket_name) + if not found: #check if minio bucket exists + raise BucketNotFound(bucket_name) - # download .dir object - temp_dir = f"{download_loc}/temp_dir" - try: - response = client.fget_object(bucket_name, object_name, temp_dir) - except Exception as e: - print(f"object {object_name} is not downloaded.") - return total_files_in_directory,file_download_success,download_failure_return_code + """" + if object_name ends with .dir - it is a directory. + we download .dir object with 'temp_dir' and remove + this after all the files from this .dir object is downloaded. + """ + + # in case of .dir, download_loc is a absolute path for a folder + os.makedirs(download_loc, mode=0o777, exist_ok=True) + + total_files_in_directory = 0 + files_downloaded = 0 + # download .dir object + temp_dir = f"{download_loc}/temp_dir" + try: + response = self.client.fget_object(bucket_name, object_name, temp_dir) + # except Exception as e: + # print(f"object {object_name} is not downloaded.") + # return total_files_in_directory,file_download_success,download_failure_return_code - with open(temp_dir, 'r') as file: - tracked_files = eval(file.read()) + with open(temp_dir, 'r') as file: + tracked_files = eval(file.read()) - # removing temp_dir - if os.path.exists(temp_dir): - os.remove(temp_dir) + # removing temp_dir + if os.path.exists(temp_dir): + os.remove(temp_dir) - """ - object_name = files/md5/c9/d8fdacc0d942cf8d7d95b6301cfb97.dir - contains the path of the .dir on the artifact repo - we need to remove the hash of the .dir from the object_name - which will leave us with the artifact repo path - """ - repo_path = object_name.split("/") - repo_path = repo_path[:len(repo_path)-2] - repo_path = "/".join(repo_path) - file_download_failure = 0 - - - for file_info in tracked_files: - total_files_in_directory += 1 - relpath = file_info['relpath'] - md5_val = file_info['md5'] - # download_loc = /home/sharvark/datatslice/example-get-started/test/artifacts/raw_data - # md5_val = a237457aa730c396e5acdbc5a64c8453 - # we need a2/37457aa730c396e5acdbc5a64c8453 - formatted_md5 = md5_val[:2] + '/' + md5_val[2:] - temp_download_loc = f"{download_loc}/{relpath}" - temp_object_name = f"{repo_path}/{formatted_md5}" - try: - obj = client.fget_object(bucket_name, temp_object_name, temp_download_loc) - if obj: - print(f"object {temp_object_name} downloaded at {temp_download_loc}.") - file_download_success += 1 - else: - file_download_failure += 1 - print(f"object {temp_object_name} is not downloaded.") - except Exception as e: - print(f"object {temp_object_name} is not downloaded.") - file_download_failure += 1 - if file_download_failure == 0: # if count_failed is 0 it means all the objects of directory are downloaded - response = True - else: # if count_failed is greater than 0 it means some artifacts or all are not downloaded - response = False - else: - #print("inside else loop") + """ + object_name = files/md5/c9/d8fdacc0d942cf8d7d95b6301cfb97.dir + contains the path of the .dir on the artifact repo + we need to remove the hash of the .dir from the object_name + which will leave us with the artifact repo path + """ + repo_path = object_name.split("/") + repo_path = repo_path[:len(repo_path)-2] + repo_path = "/".join(repo_path) + + + obj=True + for file_info in tracked_files: + total_files_in_directory += 1 + relpath = file_info['relpath'] + md5_val = file_info['md5'] + # download_loc = /home/sharvark/datatslice/example-get-started/test/artifacts/raw_data + # md5_val = a237457aa730c396e5acdbc5a64c8453 + # we need a2/37457aa730c396e5acdbc5a64c8453 + formatted_md5 = md5_val[:2] + '/' + md5_val[2:] + temp_download_loc = f"{download_loc}/{relpath}" + temp_object_name = f"{repo_path}/{formatted_md5}" try: - response = client.fget_object(bucket_name, object_name, download_loc) + obj = self.client.fget_object(bucket_name, temp_object_name, temp_download_loc) + if obj: + files_downloaded +=1 + print(f"object {temp_object_name} downloaded at {temp_download_loc}.") + else: + print(f"object {temp_object_name} is not downloaded.") except Exception as e: - print(f"object {object_name} is not downloaded.") - return total_files_in_directory,file_download_success,download_failure_return_code - if response: - print(f"object {object_name} downloaded at {download_loc}.") - return total_files_in_directory,file_download_success, download_success_return_code - else: - print(f"object {object_name} is not downloaded.") - return total_files_in_directory,file_download_success,download_failure_return_code - except TypeError as exception: - #print("inside ") - return exception - except S3Error as exception: - return exception + print(f"object {temp_object_name} is not downloaded.") + # total_files - files_downloaded gives us the number of files which are failed to download + if (total_files_in_directory - files_downloaded) == 0: + return total_files_in_directory, files_downloaded, True + else: + return total_files_in_directory, files_downloaded, False + except Exception as e: + print(f"object {object_name} is not downloaded.") + # need to improve this + # We usually don't count .dir as a file while counting total_files_in_directory. + # However, here we failed to download the .dir folder itself. So we need to make + # total_files_in_directory = 1, because .............. + total_files_in_directory = 1 + return total_files_in_directory, files_downloaded, False + + + + + + + + # except TypeError as exception: + # #print("inside ") + # return exception + # except S3Error as exception: + # return exception From f9112700ad94b00f9acb32c5491711c2bb7f6b18 Mon Sep 17 00:00:00 2001 From: abhinav chobey Date: Sun, 15 Dec 2024 21:25:19 -0800 Subject: [PATCH 15/41] changing way of writing amazons3_artifacts.py --- cmflib/commands/artifact/pull.py | 102 +++++----- cmflib/storage_backends/amazonS3_artifacts.py | 178 +++++++++--------- 2 files changed, 151 insertions(+), 129 deletions(-) diff --git a/cmflib/commands/artifact/pull.py b/cmflib/commands/artifact/pull.py index 3ad54081..49fe5c8b 100644 --- a/cmflib/commands/artifact/pull.py +++ b/cmflib/commands/artifact/pull.py @@ -257,7 +257,7 @@ def run(self): return BatchDownloadSuccess(dir_files_downloaded) else: file_failed_to_download = total_files_in_directory - dir_files_downloaded - return BatchDownloadFailure(dir_files_downloaded, file_failed_to_downloaded) + return BatchDownloadFailure(dir_files_downloaded, file_failed_to_download) else: files_downloaded = 0 files_failed_to_download = 0 @@ -277,6 +277,7 @@ def run(self): print(f"object {object_name} downloaded at {download_loc}.") files_downloaded += 1 else: + print(f"object {object_name} is not downloaded ") files_failed_to_download += 1 else: total_files_in_directory, dir_files_downloaded, download_flag = minio_class_obj.download_directory( @@ -332,7 +333,7 @@ def run(self): return BatchDownloadSuccess(dir_files_downloaded) else: file_failed_to_download = total_files_in_directory - dir_files_downloaded - return BatchDownloadFailure(dir_files_downloaded, file_failed_to_downloaded) + return BatchDownloadFailure(dir_files_downloaded, file_failed_to_download) else: files_downloaded = 0 files_failed_to_download = 0 @@ -350,7 +351,6 @@ def run(self): # local_args [1] = download_loc if not local_args[0].endswith(".dir"): - print("current dvc loc = ", local_args[0]) object_name, download_loc, download_flag = local_class_obj.download_file( current_directory, local_args[0], local_args[1]) # print output here because we are in a loop and can't return the control @@ -358,9 +358,9 @@ def run(self): print(f"object {object_name} downloaded at {download_loc}.") files_downloaded += 1 else: + print(f"object {object_name} is not downloaded ") files_failed_to_download += 1 else: - print("i should come here once") # we are downloading multiple files from a directory total_files_in_directory, dir_files_downloaded, download_flag = local_class_obj.download_directory( current_directory, local_args[0], local_args[1]) @@ -400,12 +400,12 @@ def run(self): if return_code == 206: file_downloaded = 1 else: - file_failed_to_downloaded = 1 + file_failed_to_download = 1 if return_code == 206: status = BatchDownloadSuccess(file_downloaded) else: - status = BatchDownloadFailure(total_files_in_directory, file_failed_to_downloaded) + status = BatchDownloadFailure(total_files_in_directory, file_failed_to_download) return status else: for name, url in name_url_dict.items(): @@ -500,7 +500,7 @@ def run(self): status = BatchDownloadFailure(files_downloaded=files_downloaded, Files_failed_to_download= Files_failed_to_download) return status elif dvc_config_op["core.remote"] == "amazons3": - amazonS3_class_obj = amazonS3_artifacts.AmazonS3Artifacts() + amazonS3_class_obj = amazonS3_artifacts.AmazonS3Artifacts(dvc_config_op) if self.args.artifact_name: output = self.search_artifact(name_url_dict) # output[0] = name @@ -510,51 +510,70 @@ def run(self): else: args = self.extract_repo_args("amazons3", output[0], output[1], current_directory) if args[0] and args[1] and args[2]: - total_files_in_directory,file_downloaded,return_code = amazonS3_class_obj.download_artifacts( - dvc_config_op, - current_directory, - args[0], # bucket_name - args[1], # object_name - args[2], # download_loc - ) - file_failed_to_download = total_files_in_directory - file_downloaded - if not args[0].endswith(".dir"): - if return_code ==206: - file_downloaded = 1 - else: - file_failed_to_download = 1 + if not args[1].endswith(".dir"): + object_name, download_loc, download_flag = amazonS3_class_obj.download_file( + current_directory, + args[0], # bucket_name + args[1], # object_name + args[2], # download_loc + ) + if download_flag: + return ObjectDownloadSuccess(object_name, download_loc) + else: + return ObjectDownloadFailure(object_name) + else: + total_files_in_directory, dir_files_downloaded, download_flag = amazonS3_class_obj.download_directory(current_directory, + args[0], # bucket_name + args[1], # object_name + args[2], # download_loc + ) + if download_flag: + return BatchDownloadSuccess(dir_files_downloaded) + else: + file_failed_to_download = total_files_in_directory - dir_files_downloaded + return BatchDownloadFailure(dir_files_downloaded, file_failed_to_download) - if return_code == 206: - status = BatchDownloadSuccess(file_downloaded) - else: - status = BatchDownloadFailure(total_files_in_directory,file_failed_to_download) - return status + else: + files_downloaded = 0 + files_failed_to_download = 0 for name, url in name_url_dict.items(): if not isinstance(url, str): continue args = self.extract_repo_args("amazons3", name, url, current_directory) - if not args[1].endswith(".dir"): - total_files_count += 1 if args[0] and args[1] and args[2]: - total_files_in_dir,count_files_success,return_code = amazonS3_class_obj.download_artifacts( - dvc_config_op, + if not args[1].endswith(".dir"): + object_name, download_loc, download_flag = amazonS3_class_obj.download_file( + current_directory, + args[0], # bucket_name + args[1], # object_name + args[2], # download_loc + ) + if download_flag: + print(f"object {object_name} downloaded at {download_loc}.") + files_downloaded += 1 + else: + print(f"object {object_name} is not downloaded ") + files_failed_to_download += 1 + else: + total_files_in_directory, dir_files_downloaded, download_flag = amazonS3_class_obj.download_directory( current_directory, args[0], # bucket_name args[1], # object_name - args[2], # download_loc + args[2], # path_name ) - total_files_count += total_files_in_dir - files_download_completed += count_files_success - if return_code == 206 and not args[1].endswith(".dir") : - files_download_completed += 1 - files_downloaded = files_download_completed + count_files_success - Files_failed_to_download = total_files_in_dir + total_files_count - files_download_completed - count_files_success - if Files_failed_to_download == 0: - status = BatchDownloadSuccess(files_downloaded=files_downloaded) + # download_flag is true only when all the files from the directory are successfully downlaoded. + if download_flag: + files_downloaded += dir_files_downloaded + else: + files_downloaded += dir_files_downloaded + files_failed_to_download += (total_files_in_directory - dir_files_downloaded) + + # we are assuming, if files_failed_to_download > 0, it means our download of artifacts is not success + if not files_failed_to_download: + return BatchDownloadSuccess(files_downloaded) else: - status = BatchDownloadFailure(files_downloaded=files_downloaded, Files_failed_to_download= Files_failed_to_download) - return status + return BatchDownloadFailure(files_downloaded, files_failed_to_download) else: remote = dvc_config_op["core.remote"] msg = f"{remote} is not valid artifact repository for CMF.\n Reinitialize CMF." @@ -590,5 +609,4 @@ def add_parser(subparsers, parent_parser): "-a", "--artifact_name", help="Specify artifact name.", metavar="" ) - parser.set_defaults(func=CmdArtifactPull) - + parser.set_defaults(func=CmdArtifactPull) \ No newline at end of file diff --git a/cmflib/storage_backends/amazonS3_artifacts.py b/cmflib/storage_backends/amazonS3_artifacts.py index 3fe912d3..315f9bc4 100644 --- a/cmflib/storage_backends/amazonS3_artifacts.py +++ b/cmflib/storage_backends/amazonS3_artifacts.py @@ -18,110 +18,114 @@ import boto3 class AmazonS3Artifacts: - def download_artifacts( + + def __init__(self, dvc_config_op): + self.access_key = dvc_config_op["remote.amazons3.access_key_id"] + self.secret_key = dvc_config_op["remote.amazons3.secret_access_key"] + self.session_token = dvc_config_op["remote.amazons3.session_token"] + self.s3 = boto3.client( + 's3', + aws_access_key_id = self.access_key, + aws_secret_access_key = self.secret_key, + aws_session_token = self.session_token + ) + + def download_file( self, - dvc_config_op, current_directory: str, bucket_name: str, object_name: str, download_loc: str, ): - access_key = dvc_config_op["remote.amazons3.access_key_id"] - secret_key = dvc_config_op["remote.amazons3.secret_access_key"] - session_token = dvc_config_op["remote.amazons3.session_token"] try: - s3 = boto3.client( - 's3', - aws_access_key_id=access_key, - aws_secret_access_key=secret_key, - aws_session_token=session_token - ) - s3.head_bucket(Bucket=bucket_name) - + response = "" + self.s3.head_bucket(Bucket=bucket_name) + dir_path = "" if "/" in download_loc: dir_path, _ = download_loc.rsplit("/", 1) if dir_path != "": os.makedirs(dir_path, mode=0o777, exist_ok=True) # creating subfolders if needed + response = self.s3.download_file(bucket_name, object_name, download_loc) + if response == None: + return object_name, download_loc, True + else: + return object_name, download_loc, False + except Exception as e: + return object_name, download_loc, False - response = "" - total_files_in_directory = 0 - file_download_success = 0 - download_success_return_code = 206 - download_failure_return_code = 207 - - """" - if object_name ends with .dir - it is a directory. - we download .dir object with 'temp_dir' and remove - this after all the files from this .dir object is downloaded. + def download_directory(self, + current_directory: str, + bucket_name: str, + object_name: str, + download_loc: str, + ): + #response = "" + self.s3.head_bucket(Bucket=bucket_name) + """" + if object_name ends with .dir - it is a directory. + we download .dir object with 'temp_dir' and remove + this after all the files from this .dir object is downloaded. + """ + # in case of .dir, download_loc is a absolute path for a folder + dir_path = "" + if "/" in download_loc: + dir_path, _ = download_loc.rsplit("/", 1) + if dir_path != "": + os.makedirs(dir_path, mode=0o777, exist_ok=True) # creating subfolders if needed + os.makedirs(download_loc, mode=0o777, exist_ok=True) + total_files_in_directory = 0 + files_downloaded = 0 + # download .dir object + temp_dir = f"{download_loc}/temp_dir" + try: + response = self.s3.download_file(bucket_name, object_name, temp_dir) + with open(temp_dir, 'r') as file: + tracked_files = eval(file.read()) + # removing temp_dir + if os.path.exists(temp_dir): + os.remove(temp_dir) """ - if object_name.endswith('.dir'): - # in case of .dir, download_loc is a absolute path for a folder - os.makedirs(download_loc, mode=0o777, exist_ok=True) - - # download .dir object - temp_dir = f"{download_loc}/temp_dir" + object_name = files/md5/c9/d8fdacc0d942cf8d7d95b6301cfb97.dir + contains the path of the .dir on the artifact repo + we need to remove the hash of the .dir from the object_name + which will leave us with the artifact repo path + """ + repo_path = "/".join(object_name.split("/")[:-2]) + obj=True + for file_info in tracked_files: + total_files_in_directory += 1 + relpath = file_info['relpath'] + md5_val = file_info['md5'] + # download_loc = /home/user/datatslice/example-get-started/test/artifacts/raw_data + # md5_val = a237457aa730c396e5acdbc5a64c8453 + # we need a2/37457aa730c396e5acdbc5a64c8453 + formatted_md5 = md5_val[:2] + '/' + md5_val[2:] + temp_download_loc = f"{download_loc}/{relpath}" + temp_object_name = f"{repo_path}/{formatted_md5}" try: - response = s3.download_file(bucket_name, object_name, temp_dir) - except Exception as e: - print(f"object {object_name} is not downloaded.") - return total_files_in_directory,file_download_success,download_failure_return_code - - with open(temp_dir, 'r') as file: - tracked_files = eval(file.read()) - - # removing temp_dir - if os.path.exists(temp_dir): - os.remove(temp_dir) - - """ - object_name = files/md5/c9/d8fdacc0d942cf8d7d95b6301cfb97.dir - contains the path of the .dir on the artifact repo - we need to remove the hash of the .dir from the object_name - which will leave us with the artifact repo path - """ - file_download_failure = 0 - repo_path = "/".join(object_name.split("/")[:-2]) - for file_info in tracked_files: - total_files_in_directory += 1 - relpath = file_info['relpath'] - md5_val = file_info['md5'] - # download_loc = /home/user/datatslice/example-get-started/test/artifacts/raw_data - # md5_val = a237457aa730c396e5acdbc5a64c8453 - # we need a2/37457aa730c396e5acdbc5a64c8453 - formatted_md5 = md5_val[:2] + '/' + md5_val[2:] - temp_download_loc = f"{download_loc}/{relpath}" - temp_object_name = f"{repo_path}/{formatted_md5}" - try: - obj = s3.download_file(bucket_name, temp_object_name, temp_download_loc) - if obj == None: - file_download_success += 1 - print(f"object {temp_object_name} downloaded at {temp_download_loc}.") - else: - print(f"object {temp_object_name} is not downloaded.") - file_download_failure += 1 - except Exception as e: + obj = self.s3.download_file(bucket_name, temp_object_name, temp_download_loc) + if obj == None: + files_downloaded += 1 + print(f"object {temp_object_name} downloaded at {temp_download_loc}.") + else: print(f"object {temp_object_name} is not downloaded.") - file_download_failure += 1 - if file_download_failure == 0: # if count_failed is 0 it means all the objects of directory are downloaded - response = None - else: # if count_failed is greater than 0 it means some artifacts or all are not downloaded - response = False - else: - # download objects which are file - try: - response = s3.download_file(bucket_name, object_name, download_loc) except Exception as e: - print(f"object {object_name} is not downloaded.") - return total_files_in_directory,file_download_success,download_failure_return_code - if response == None: - print(f"object {object_name} downloaded at {download_loc}.") - return total_files_in_directory,file_download_success, download_success_return_code - if response == False: - print(f"object {object_name} is not downloaded.") - return total_files_in_directory,file_download_success,download_failure_return_code + print(f"object {temp_object_name} is not downloaded.") + if (total_files_in_directory - files_downloaded) == 0: + return total_files_in_directory, files_downloaded, True + else: + return total_files_in_directory, files_downloaded, False + except Exception as e: + print(f"object {object_name} is not downloaded.") + # need to improve this + # We usually don't count .dir as a file while counting total_files_in_directory. + # However, here we failed to download the .dir folder itself. So we need to make + # total_files_in_directory = 1, because .............. + total_files_in_directory = 1 + return total_files_in_directory, files_downloaded, False - except s3.exceptions.ClientError as e: + except self.s3.exceptions.ClientError as e: # If a specific error code is returned, the bucket does not exist if e.response['Error']['Code'] == '404': return f"{bucket_name} doesn't exists!!" @@ -131,4 +135,4 @@ def download_artifacts( except TypeError as exception: return exception except Exception as e: - return e + return e \ No newline at end of file From 11430acd96670a29739660e35c60b0372564c746 Mon Sep 17 00:00:00 2001 From: Varkha Sharma Date: Sun, 15 Dec 2024 21:43:52 -0800 Subject: [PATCH 16/41] adding ssh remote changes --- cmflib/commands/artifact/pull.py | 6 +- cmflib/storage_backends/local_artifacts.py | 134 +++++----- .../storage_backends/sshremote_artifacts.py | 236 ++++++++++-------- 3 files changed, 208 insertions(+), 168 deletions(-) diff --git a/cmflib/commands/artifact/pull.py b/cmflib/commands/artifact/pull.py index 49fe5c8b..ac3c0ca0 100644 --- a/cmflib/commands/artifact/pull.py +++ b/cmflib/commands/artifact/pull.py @@ -298,6 +298,7 @@ def run(self): return BatchDownloadSuccess(files_downloaded) else: return BatchDownloadFailure(files_downloaded, files_failed_to_download) + elif dvc_config_op["core.remote"] == "local-storage": local_class_obj = local_artifacts.LocalArtifacts(dvc_config_op) # There are two main conditions @@ -433,6 +434,7 @@ def run(self): else: status = BatchDownloadFailure(files_downloaded=files_downloaded, Files_failed_to_download= Files_failed_to_download) return status + elif dvc_config_op["core.remote"] == "osdf": #Regenerate Token for OSDF from cmflib.utils.helper_functions import generate_osdf_token @@ -499,6 +501,7 @@ def run(self): else: status = BatchDownloadFailure(files_downloaded=files_downloaded, Files_failed_to_download= Files_failed_to_download) return status + elif dvc_config_op["core.remote"] == "amazons3": amazonS3_class_obj = amazonS3_artifacts.AmazonS3Artifacts(dvc_config_op) if self.args.artifact_name: @@ -609,4 +612,5 @@ def add_parser(subparsers, parent_parser): "-a", "--artifact_name", help="Specify artifact name.", metavar="" ) - parser.set_defaults(func=CmdArtifactPull) \ No newline at end of file + parser.set_defaults(func=CmdArtifactPull) + diff --git a/cmflib/storage_backends/local_artifacts.py b/cmflib/storage_backends/local_artifacts.py index 8570c1ac..4ab19c15 100644 --- a/cmflib/storage_backends/local_artifacts.py +++ b/cmflib/storage_backends/local_artifacts.py @@ -56,76 +56,76 @@ def download_directory( current_directory: str, object_name: str, download_loc: str, - ): - # get_file() only creates file, to put artifacts in proper directory, subfolders are required. - # download_loc = contains absolute path of the file with file name and extension - dir_path = "" - if "/" in download_loc: - dir_path, _ = download_loc.rsplit("/", 1) - if dir_path != "": - os.makedirs(dir_path, mode=0o777, exist_ok=True) # creating subfolders if needed + ): + # get_file() only creates file, to put artifacts in proper directory, subfolders are required. + # download_loc = contains absolute path of the file with file name and extension + dir_path = "" + if "/" in download_loc: + dir_path, _ = download_loc.rsplit("/", 1) + if dir_path != "": + os.makedirs(dir_path, mode=0o777, exist_ok=True) # creating subfolders if needed - """" - if object_name ends with .dir - it is a directory. - we download .dir object with 'temp_dir' and remove - this after all the files from this .dir object is downloaded. - """ - # in case of .dir, download_loc is a absolute path for a folder - os.makedirs(download_loc, mode=0o777, exist_ok=True) - total_files_in_directory = 0 - files_downloaded = 0 - # download the .dir object - temp_dir = f"{download_loc}/dir" - try: - # we are getting .dir object which contains information about all the files tracked inside it - response = self.fs.get_file(object_name, temp_dir) - with open(temp_dir, 'r') as file: - tracked_files = eval(file.read()) + """" + if object_name ends with .dir - it is a directory. + we download .dir object with 'temp_dir' and remove + this after all the files from this .dir object is downloaded. + """ + # in case of .dir, download_loc is a absolute path for a folder + os.makedirs(download_loc, mode=0o777, exist_ok=True) + total_files_in_directory = 0 + files_downloaded = 0 + # download the .dir object + temp_dir = f"{download_loc}/dir" + try: + # we are getting .dir object which contains information about all the files tracked inside it + response = self.fs.get_file(object_name, temp_dir) + with open(temp_dir, 'r') as file: + tracked_files = eval(file.read()) - # removing temp_dir - if os.path.exists(temp_dir): - os.remove(temp_dir) + # removing temp_dir + if os.path.exists(temp_dir): + os.remove(temp_dir) - """ - object_name = "files/md5/9b/9a458ac0b534f088a47c2b68bae479.dir" - contains the path of the .dir on the artifact repo - we need to remove the hash of the .dir from the object_name - which will leave us with the artifact repo path - """ - repo_path = "/".join(object_name.split("/")[:-2]) + """ + object_name = "files/md5/9b/9a458ac0b534f088a47c2b68bae479.dir" + contains the path of the .dir on the artifact repo + we need to remove the hash of the .dir from the object_name + which will leave us with the artifact repo path + """ + repo_path = "/".join(object_name.split("/")[:-2]) - obj = True - for file_info in tracked_files: - total_files_in_directory += 1 - relpath = file_info['relpath'] - md5_val = file_info['md5'] - # md5_val = a237457aa730c396e5acdbc5a64c8453 - # we need a2/37457aa730c396e5acdbc5a64c8453 - formatted_md5 = md5_val[:2] + '/' + md5_val[2:] - temp_object_name = f"{repo_path}/{formatted_md5}" - temp_download_loc = f"{download_loc}/{relpath}" - try: - obj = self.fs.get_file(temp_object_name, temp_download_loc) - if obj == None: - files_downloaded += 1 - print(f"object {temp_object_name} downloaded at {temp_download_loc}.") - else: - print(f"object {temp_object_name} is not downloaded.") - # this exception is for get_file() function for temp_object_name - except Exception as e: + obj = True + for file_info in tracked_files: + total_files_in_directory += 1 + relpath = file_info['relpath'] + md5_val = file_info['md5'] + # md5_val = a237457aa730c396e5acdbc5a64c8453 + # we need a2/37457aa730c396e5acdbc5a64c8453 + formatted_md5 = md5_val[:2] + '/' + md5_val[2:] + temp_object_name = f"{repo_path}/{formatted_md5}" + temp_download_loc = f"{download_loc}/{relpath}" + try: + obj = self.fs.get_file(temp_object_name, temp_download_loc) + if obj == None: + files_downloaded += 1 + print(f"object {temp_object_name} downloaded at {temp_download_loc}.") + else: print(f"object {temp_object_name} is not downloaded.") + # this exception is for get_file() function for temp_object_name + except Exception as e: + print(f"object {temp_object_name} is not downloaded.") - # total_files - files_downloaded gives us the number of files which are failed to download - if (total_files_in_directory - files_downloaded) == 0: - return total_files_in_directory, files_downloaded, True - else: - return total_files_in_directory, files_downloaded, False - # this exception is for get_file() function for object_name - except Exception as e: - print(f"object {object_name} is not downloaded.") - # need to improve this - # We usually don't count .dir as a file while counting total_files_in_directory. - # However, here we failed to download the .dir folder itself. So we need to make - # total_files_in_directory = 1, because .............. - total_files_in_directory = 1 - return total_files_in_directory, files_downloaded, False + # total_files - files_downloaded gives us the number of files which are failed to download + if (total_files_in_directory - files_downloaded) == 0: + return total_files_in_directory, files_downloaded, True + else: + return total_files_in_directory, files_downloaded, False + # this exception is for get_file() function for object_name + except Exception as e: + print(f"object {object_name} is not downloaded.") + # need to improve this + # We usually don't count .dir as a file while counting total_files_in_directory. + # However, here we failed to download the .dir folder itself. So we need to make + # total_files_in_directory = 1, because .............. + total_files_in_directory = 1 + return total_files_in_directory, files_downloaded, False diff --git a/cmflib/storage_backends/sshremote_artifacts.py b/cmflib/storage_backends/sshremote_artifacts.py index 3b02911b..01d81454 100644 --- a/cmflib/storage_backends/sshremote_artifacts.py +++ b/cmflib/storage_backends/sshremote_artifacts.py @@ -18,122 +18,158 @@ import paramiko # this is temporary - need to remove after TripleDES warning goes away from paramiko -import warnings -warnings.filterwarnings(action='ignore', module='.*paramiko.*') +# import warnings +# warnings.filterwarnings(action='ignore', module='.*paramiko.*') class SSHremoteArtifacts: - def download_artifacts( + + def __init__(self, dvc_config_op): + self.user = dvc_config_op["remote.ssh-storage.user"] + self.password = dvc_config_op["remote.ssh-storage.password"] + + + def download_file( self, - dvc_config_op, host: str, current_directory: str, object_name: str, download_loc: str, ): - user = dvc_config_op["remote.ssh-storage.user"] - password = dvc_config_op["remote.ssh-storage.password"] + ssh = paramiko.SSHClient() + ssh.set_missing_host_key_policy( + paramiko.AutoAddPolicy() + ) # this can lead to man in the middle attack, need to find another solution + ssh.connect(host, username=self.user, password=self.password) + sftp = ssh.open_sftp() + dir_path = "" + # in case download_loc is absolute path like /home/user/test/data.xml.gz + # we need to make this absolute path a relative one by removing first '/' + if os.path.isabs(download_loc): + download_loc = download_loc[1:] + if "/" in download_loc: + dir_path, _ = download_loc.rsplit("/", 1) + if dir_path != "": + # creates subfolders needed as per artifacts' folder structure + os.makedirs(dir_path, mode=0o777, exist_ok=True) + + response = "" + abs_download_loc = os.path.abspath(os.path.join(current_directory, download_loc)) try: - ssh = paramiko.SSHClient() - ssh.set_missing_host_key_policy( - paramiko.AutoAddPolicy() - ) # this can lead to man in the middle attack, need to find another solution - ssh.connect(host, username=user, password=password) - sftp = ssh.open_sftp() - dir_path = "" - # in case download_loc is absolute path like home/user/test/data.xml.gz - # we need to make this absolute path a relative one by removing first '/' - if os.path.isabs(download_loc): - download_loc = download_loc[1:] - if "/" in download_loc: - dir_path, _ = download_loc.rsplit("/", 1) - if dir_path != "": - # creates subfolders needed as per artifacts' folder structure - os.makedirs(dir_path, mode=0o777, exist_ok=True) - - response = "" - abs_download_loc = os.path.abspath(os.path.join(current_directory, download_loc)) + response = sftp.put(object_name, abs_download_loc) + # we can close sftp connection as we have already downloaded the file + sftp.close() + ssh.close() + if response: + return object_name, abs_download_loc, True + else: + return object_name, abs_download_loc, False + except Exception as e: + # this exception is for function sftp.put() + sftp.close() + ssh.close() + return object_name, abs_download_loc, False + + + def download_directory( + self, + host: str, + current_directory: str, + object_name: str, + download_loc: str, + ): + ssh = paramiko.SSHClient() + ssh.set_missing_host_key_policy( + paramiko.AutoAddPolicy() + ) # this can lead to man in the middle attack, need to find another solution + ssh.connect(host, username=self.user, password=self.password) + sftp = ssh.open_sftp() + dir_path = "" + # in case download_loc is absolute path like home/user/test/data.xml.gz + # we need to make this absolute path a relative one by removing first '/' + if os.path.isabs(download_loc): + download_loc = download_loc[1:] + if "/" in download_loc: + dir_path, _ = download_loc.rsplit("/", 1) + if dir_path != "": + # creates subfolders needed as per artifacts' folder structure + os.makedirs(dir_path, mode=0o777, exist_ok=True) + + response = "" + abs_download_loc = os.path.abspath(os.path.join(current_directory, download_loc)) - total_files_in_directory = 0 - file_download_success = 0 - download_success_return_code = 206 - download_failure_return_code = 207 - """" - if object_name ends with .dir - it is a directory. - we download .dir object with 'temp_dir' and remove - this after all the files from this .dir object is downloaded. + """" + if object_name ends with .dir - it is a directory. + we download .dir object with 'temp_dir' and remove + this after all the files from this .dir object is downloaded. + """ + # in case of .dir, abs_download_loc is a absolute path for a folder + os.makedirs(abs_download_loc, mode=0o777, exist_ok=True) + + # download .dir object + temp_dir = f"{abs_download_loc}/temp_dir" + try: + response = sftp.put(object_name, temp_dir) + with open(temp_dir, 'r') as file: + tracked_files = eval(file.read()) + + # removing temp_dir + if os.path.exists(temp_dir): + os.remove(temp_dir) + """ + object_name = /home/user/ssh-storage/files/md5/dd/2d792b7cf6efb02231f85c6147e403.dir + contains the path of the .dir on the artifact repo + we need to remove the hash of the .dir from the object_name + which will leave us with the artifact repo path """ - if object_name.endswith('.dir'): - # in case of .dir, abs_download_loc is a absolute path for a folder - os.makedirs(abs_download_loc, mode=0o777, exist_ok=True) + + repo_path = "/".join(object_name.split("/")[:-2]) - # download .dir object - temp_dir = f"{abs_download_loc}/temp_dir" + total_files_in_directory = 0 + files_downloaded = 0 + for file_info in tracked_files: + total_files_in_directory += 1 + relpath = file_info['relpath'] + md5_val = file_info['md5'] + # download_loc = /home/user/datatslice/example-get-started/test/artifacts/raw_data + # md5_val = a237457aa730c396e5acdbc5a64c8453 + # we need a2/37457aa730c396e5acdbc5a64c8453 + formatted_md5 = md5_val[:2] + '/' + md5_val[2:] + temp_download_loc = f"{abs_download_loc}/{relpath}" + temp_object_name = f"{repo_path}/{formatted_md5}" try: - response = sftp.put(object_name, temp_dir) - except Exception as e: - print(f"object {object_name} is not downloaded.") + obj = sftp.put(object_name, temp_download_loc) sftp.close() ssh.close() - return total_files_in_directory,file_download_success,download_failure_return_code - - with open(temp_dir, 'r') as file: - tracked_files = eval(file.read()) - - # removing temp_dir - if os.path.exists(temp_dir): - os.remove(temp_dir) - - """ - object_name = /home/user/ssh-storage/files/md5/dd/2d792b7cf6efb02231f85c6147e403.dir - contains the path of the .dir on the artifact repo - we need to remove the hash of the .dir from the object_name - which will leave us with the artifact repo path - """ - repo_path = "/".join(object_name.split("/")[:-2]) - for file_info in tracked_files: - total_files_in_directory += 1 - relpath = file_info['relpath'] - md5_val = file_info['md5'] - # download_loc = /home/user/datatslice/example-get-started/test/artifacts/raw_data - # md5_val = a237457aa730c396e5acdbc5a64c8453 - # we need a2/37457aa730c396e5acdbc5a64c8453 - formatted_md5 = md5_val[:2] + '/' + md5_val[2:] - temp_download_loc = f"{abs_download_loc}/{relpath}" - temp_object_name = f"{repo_path}/{formatted_md5}" - try: - obj = sftp.put(object_name, temp_download_loc) - if obj: - print(f"object {temp_object_name} downloaded at {temp_download_loc}.") - file_download_success += 1 - else: - file_download_failure += 1 - print(f"object {temp_object_name} is not downloaded.") - except Exception as e: + if obj: + files_downloaded += 1 + print(f"object {temp_object_name} downloaded at {temp_download_loc}.") + else: print(f"object {temp_object_name} is not downloaded.") - file_download_failure += 1 - if file_download_failure == 0: # if count_failed is 0 it means all the objects of directory are downloaded - response = True - else: # if count_failed is greater than 0 it means some artifacts or all are not downloaded - response = False - else: - try: - response = sftp.put(object_name, abs_download_loc) except Exception as e: - print(f"object {object_name} is not downloaded.") sftp.close() ssh.close() - return total_files_in_directory,file_download_success,download_failure_return_code - if response: - print(f"object {object_name} downloaded at {download_loc}.") - sftp.close() - ssh.close() - return total_files_in_directory,file_download_success, download_success_return_code - else: - print(f"object {object_name} is not downloaded.") - sftp.close() - ssh.close() - return total_files_in_directory,file_download_success,download_failure_return_code - except TypeError as exception: - return exception - except Exception as exception: - return exception + print(f"object {temp_object_name} is not downloaded.") + + # total_files - files_downloaded gives us the number of files which are failed to download + if (total_files_in_directory - files_downloaded) == 0: + return total_files_in_directory, files_downloaded, True + else: + return total_files_in_directory, files_downloaded, False + except Exception as e: + sftp.close() + ssh.close() + print(f"object {object_name} is not downloaded.") + # need to improve this + # We usually don't count .dir as a file while counting total_files_in_directory. + # However, here we failed to download the .dir folder itself. So we need to make + # total_files_in_directory = 1, because .............. + total_files_in_directory = 1 + return total_files_in_directory, files_downloaded, False + + + + + # except TypeError as exception: + # return exception + # except Exception as exception: + # return exception From fe2e3a1d5e51a7b589f40811078a595356cf3feb Mon Sep 17 00:00:00 2001 From: abhinav chobey Date: Mon, 16 Dec 2024 02:25:26 -0800 Subject: [PATCH 17/41] changed the method for handling cmf exception for ssh remote --- cmflib/commands/artifact/pull.py | 157 +++++++++++++----- cmflib/storage_backends/amazonS3_artifacts.py | 51 +++++- 2 files changed, 164 insertions(+), 44 deletions(-) diff --git a/cmflib/commands/artifact/pull.py b/cmflib/commands/artifact/pull.py index ac3c0ca0..58ff11db 100644 --- a/cmflib/commands/artifact/pull.py +++ b/cmflib/commands/artifact/pull.py @@ -221,18 +221,36 @@ def run(self): output = DvcConfig.get_dvc_config() # pulling dvc config if type(output) is not dict: return output + + """ + There are multiple scenarios for cmf artifact pull + Code checks if self.args.artifact_name is provided by user or not + under these conditions there are two more conditions + 1. if file is not .dir (single file) + Download single file + 2. else file is .dir (directory) + download all files from directory + + """ + dvc_config_op = output if dvc_config_op["core.remote"] == "minio": minio_class_obj = minio_artifacts.MinioArtifacts(dvc_config_op) - if self.args.artifact_name: #checking if artifact_name is in mlmd + # Check if a specific artifact name is provided as input. + if self.args.artifact_name: + # Search for the artifact in the metadata store. output = self.search_artifact(name_url_dict) - # output[0] = name + # output[0] = artifact_name # output[1] = url if output is None: raise ArtifactNotFound(self.args.artifact_name) else: + # Extract repository arguments specific to MinIO. minio_args = self.extract_repo_args("minio", output[0], output[1], current_directory) + + # Check if the object name doesn't end with `.dir` (indicating it's a file). if not minio_args[1].endswith(".dir"): + # Download a single file from MinIO. object_name, download_loc, download_flag = minio_class_obj.download_file( current_directory, minio_args[0], # bucket_name @@ -240,11 +258,12 @@ def run(self): minio_args[2], # path_name ) if download_flag: + # Return success if the file is downloaded successfully. return ObjectDownloadSuccess(object_name, download_loc) else: return ObjectDownloadFailure(object_name) else: - # we are downloading multiple files from a directory + # If object name ends with `.dir`, download multiple files from a directory # return total_files_in_directory, files_downloaded total_files_in_directory, dir_files_downloaded, download_flag = minio_class_obj.download_directory( current_directory, @@ -254,24 +273,34 @@ def run(self): ) if download_flag: + # Return success if all files in the directory are downloaded. return BatchDownloadSuccess(dir_files_downloaded) else: + # Calculate the number of files that failed to download. file_failed_to_download = total_files_in_directory - dir_files_downloaded return BatchDownloadFailure(dir_files_downloaded, file_failed_to_download) + else: + # Handle the case where no specific artifact name is provided. files_downloaded = 0 files_failed_to_download = 0 + + # Iterate through the dictionary of artifact names and URLs. for name, url in name_url_dict.items(): - if not isinstance(url, str): + if not isinstance(url, str): ## Skip invalid URLs. continue minio_args = self.extract_repo_args("minio", name, url, current_directory) + + # Check if the object name doesn't end with `.dir` (indicating it's a file). if not minio_args[1].endswith(".dir"): + # Download a single file from MinIO. object_name, download_loc, download_flag = minio_class_obj.download_file( current_directory, minio_args[0], # bucket_name minio_args[1], # object_name minio_args[2], # path_name ) + # print output here because we are in a loop and can't return the control if download_flag: print(f"object {object_name} downloaded at {download_loc}.") @@ -280,13 +309,14 @@ def run(self): print(f"object {object_name} is not downloaded ") files_failed_to_download += 1 else: + # If object name ends with `.dir`, download multiple files from a directory. total_files_in_directory, dir_files_downloaded, download_flag = minio_class_obj.download_directory( current_directory, minio_args[0], # bucket_name minio_args[1], # object_name minio_args[2], # path_name ) - # download_flag is true only when all the files from the directory are successfully downlaoded. + # Return success if all files in the directory are downloaded. if download_flag: files_downloaded += dir_files_downloaded else: @@ -306,7 +336,9 @@ def run(self): # -a can be a dir or a file # Condition 2 - user can chose to download all the artifacts in one go. # we can have both dir and files in our list of artifacts + # Check if a specific artifact name is provided as input. if self.args.artifact_name: + # Search for the artifact in the metadata store. output = self.search_artifact(name_url_dict) # output[0] = name # output[1] = url @@ -314,30 +346,37 @@ def run(self): if output is None: raise ArtifactNotFound(self.args.artifact_name) else: + # Extract repository arguments specific to Local repo. local_args = self.extract_repo_args("local", output[0], output[1], current_directory) # local_args [0] = current_dvc_loc # local_args [1] = download_loc - + # Check if the object name doesn't end with `.dir` (indicating it's a file). if not local_args[0].endswith(".dir"): + # Download a single file from Local. object_name, download_loc, download_flag = local_class_obj.download_file(current_directory, local_args[0], local_args[1]) if download_flag: + # Return success if the file is downloaded successfully. return ObjectDownloadSuccess(object_name, download_loc) else: return ObjectDownloadFailure(object_name) else: - # we are downloading multiple files from a directory + # If object name ends with `.dir`, download multiple files from a directory # return total_files_in_directory, files_downloaded total_files_in_directory, dir_files_downloaded, download_flag = local_class_obj.download_directory(current_directory, local_args[0], local_args[1]) if download_flag: + # Return success if all files in the directory are downloaded. return BatchDownloadSuccess(dir_files_downloaded) else: + # Calculate the number of files that failed to download. file_failed_to_download = total_files_in_directory - dir_files_downloaded return BatchDownloadFailure(dir_files_downloaded, file_failed_to_download) else: + # Handle the case where no specific artifact name is provided. files_downloaded = 0 files_failed_to_download = 0 + # Iterate through the dictionary of artifact names and URLs. for name, url in name_url_dict.items(): if not isinstance(url, str): continue @@ -350,10 +389,12 @@ def run(self): local_args = self.extract_repo_args("local", name, url, current_directory) # local_args [0] = current_dvc_loc # local_args [1] = download_loc - + # Check if the object name doesn't end with `.dir` (indicating it's a file). if not local_args[0].endswith(".dir"): + # Download a single file from Local repo. object_name, download_loc, download_flag = local_class_obj.download_file( current_directory, local_args[0], local_args[1]) + # print output here because we are in a loop and can't return the control if download_flag: print(f"object {object_name} downloaded at {download_loc}.") @@ -362,7 +403,7 @@ def run(self): print(f"object {object_name} is not downloaded ") files_failed_to_download += 1 else: - # we are downloading multiple files from a directory + # If object name ends with `.dir`, download multiple files from a directory. total_files_in_directory, dir_files_downloaded, download_flag = local_class_obj.download_directory( current_directory, local_args[0], local_args[1]) # download_flag is true only when all the files from the directory are successfully downlaoded. @@ -380,61 +421,93 @@ def run(self): files_downloaded, files_failed_to_download) elif dvc_config_op["core.remote"] == "ssh-storage": - sshremote_class_obj = sshremote_artifacts.SSHremoteArtifacts() + sshremote_class_obj = sshremote_artifacts.SSHremoteArtifacts(dvc_config_op) + # Check if a specific artifact name is provided as input. if self.args.artifact_name: + # Search for the artifact in the metadata store. output = self.search_artifact(name_url_dict) # output[0] = name # output[1] = url if output is None: raise ArtifactNotFound(self.args.artifact_name) else: + # Extract repository arguments specific to ssh-remote. args = self.extract_repo_args("ssh", output[0], output[1], current_directory) - total_files_in_directory,file_downloaded,return_code = sshremote_class_obj.download_artifacts( - dvc_config_op, - args[0], # host, - current_directory, - args[1], # remote_loc of the artifact - args[2] # name - ) - file_failed_to_download = total_files_in_directory - file_downloaded - if not args[0].endswith(".dir"): - if return_code == 206: - file_downloaded = 1 - else: - file_failed_to_download = 1 + # Check if the object name doesn't end with `.dir` (indicating it's a file). + if not args[1].endswith(".dir"): + # Download a single file from ssh-remote. + object_name, download_loc, download_flag = sshremote_class_obj.download_file( + args[0], # host, + current_directory, + args[1], # remote_loc of the artifact + args[2] # name + ) + if download_flag: + # Return success if the file is downloaded successfully. + return ObjectDownloadSuccess(object_name, download_loc) + else: + return ObjectDownloadFailure(object_name) - if return_code == 206: - status = BatchDownloadSuccess(file_downloaded) else: - status = BatchDownloadFailure(total_files_in_directory, file_failed_to_download) - return status + # If object name ends with `.dir`, download multiple files from a directory + # return total_files_in_directory, files_downloaded + total_files_in_directory, dir_files_downloaded, download_flag = sshremote_class_obj.download_directory( + args[0], # host, + current_directory, + args[1], # remote_loc of the artifact + args[2] # name + ) + if download_flag: + # Return success if all files in the directory are downloaded. + return BatchDownloadSuccess(dir_files_downloaded) + else: + # Calculate the number of files that failed to download. + file_failed_to_download = total_files_in_directory - dir_files_downloaded + return BatchDownloadFailure(dir_files_downloaded, file_failed_to_download) else: + # Handle the case where no specific artifact name is provided. + files_downloaded = 0 + files_failed_to_download = 0 + # Iterate through the dictionary of artifact names and URLs. for name, url in name_url_dict.items(): - #print(name, url) if not isinstance(url, str): continue args = self.extract_repo_args("ssh", name, url, current_directory) + # Check if the object name doesn't end with `.dir` (indicating it's a file). if not args[1].endswith(".dir"): - total_files_count += 1 - total_files_in_dir,count_files_success,return_code = sshremote_class_obj.download_artifacts( - dvc_config_op, + # Download a single file from ssh-remote. + object_name, download_loc, download_flag = sshremote_class_obj.download_file( args[0], # host, current_directory, args[1], # remote_loc of the artifact args[2] # name ) - total_files_count += total_files_in_dir - files_download_completed += count_files_success - if return_code == 206 and not args[1].endswith(".dir") : - files_download_completed += 1 - files_downloaded = files_download_completed + count_files_success - Files_failed_to_download = total_files_in_dir + total_files_count - files_download_completed - count_files_success - if Files_failed_to_download == 0: - status = BatchDownloadSuccess(files_downloaded=files_downloaded) + # print output here because we are in a loop and can't return the control + if download_flag: + print(f"object {object_name} downloaded at {download_loc}.") + files_downloaded += 1 + else: + print(f"object {object_name} is not downloaded ") + files_failed_to_download += 1 + else: + # If object name ends with `.dir`, download multiple files from a directory. + total_files_in_directory, dir_files_downloaded, download_flag = sshremote_class_obj.download_directory( + args[0], # host, + current_directory, + args[1], # remote_loc of the artifact + args[2] # name + ) + if download_flag: + files_downloaded += dir_files_downloaded + else: + files_downloaded += dir_files_downloaded + files_failed_to_download += (total_files_in_directory - dir_files_downloaded) + + # we are assuming, if files_failed_to_download > 0, it means our download of artifacts is not success + if not files_failed_to_download: + return BatchDownloadSuccess(files_downloaded) else: - status = BatchDownloadFailure(files_downloaded=files_downloaded, Files_failed_to_download= Files_failed_to_download) - return status - + return BatchDownloadFailure(files_downloaded, files_failed_to_download) elif dvc_config_op["core.remote"] == "osdf": #Regenerate Token for OSDF from cmflib.utils.helper_functions import generate_osdf_token diff --git a/cmflib/storage_backends/amazonS3_artifacts.py b/cmflib/storage_backends/amazonS3_artifacts.py index 315f9bc4..c13f47c2 100644 --- a/cmflib/storage_backends/amazonS3_artifacts.py +++ b/cmflib/storage_backends/amazonS3_artifacts.py @@ -20,9 +20,17 @@ class AmazonS3Artifacts: def __init__(self, dvc_config_op): + """ + Initialize the AmazonS3Artifacts class with AWS credentials. + + Args: + dvc_config_op (dict): Dictionary containing AWS credentials (access key, secret key, and session token). + """ self.access_key = dvc_config_op["remote.amazons3.access_key_id"] self.secret_key = dvc_config_op["remote.amazons3.secret_access_key"] self.session_token = dvc_config_op["remote.amazons3.session_token"] + + # Create an S3 client with the provided credentials. self.s3 = boto3.client( 's3', aws_access_key_id = self.access_key, @@ -37,16 +45,35 @@ def download_file( object_name: str, download_loc: str, ): + """ + Download a single file from an S3 bucket. + + Args: + current_directory (str): The current working directory (unused here but might be useful in some contexts). + bucket_name (str): Name of the S3 bucket. + object_name (str): Key (path) of the file in the S3 bucket. + download_loc (str): Local path where the file should be downloaded. + + Returns: + tuple: (object_name, download_loc, status) where status indicates success (True) or failure (False). + """ try: response = "" + + # Check if the bucket exists. self.s3.head_bucket(Bucket=bucket_name) + # Create necessary directories for the download location. dir_path = "" if "/" in download_loc: dir_path, _ = download_loc.rsplit("/", 1) if dir_path != "": os.makedirs(dir_path, mode=0o777, exist_ok=True) # creating subfolders if needed + + # Download the file response = self.s3.download_file(bucket_name, object_name, download_loc) + + # Check if the response indicates success. if response == None: return object_name, download_loc, True else: @@ -60,7 +87,19 @@ def download_directory(self, object_name: str, download_loc: str, ): - #response = "" + """ + Download a directory from an S3 bucket using its .dir metadata object. + + Args: + current_directory (str): The current working directory (unused here but might be useful in some contexts). + bucket_name (str): Name of the S3 bucket. + object_name (str): Key (path) of the .dir object in the S3 bucket. + download_loc (str): Local directory path where the directory should be downloaded. + + Returns: + tuple: (total_files_in_directory, files_downloaded, status) where status indicates success (True) or failure (False). + """ + self.s3.head_bucket(Bucket=bucket_name) """" if object_name ends with .dir - it is a directory. @@ -76,12 +115,17 @@ def download_directory(self, os.makedirs(download_loc, mode=0o777, exist_ok=True) total_files_in_directory = 0 files_downloaded = 0 - # download .dir object + + # Temporary file to download the .dir metadata object. temp_dir = f"{download_loc}/temp_dir" try: + # Download the .dir file containing metadata about tracked files. response = self.s3.download_file(bucket_name, object_name, temp_dir) + + # Read the .dir metadata to get file information. with open(temp_dir, 'r') as file: tracked_files = eval(file.read()) + # removing temp_dir if os.path.exists(temp_dir): os.remove(temp_dir) @@ -112,12 +156,15 @@ def download_directory(self, print(f"object {temp_object_name} is not downloaded.") except Exception as e: print(f"object {temp_object_name} is not downloaded.") + + # Check if all files were successfully downloaded. if (total_files_in_directory - files_downloaded) == 0: return total_files_in_directory, files_downloaded, True else: return total_files_in_directory, files_downloaded, False except Exception as e: print(f"object {object_name} is not downloaded.") + # Handle failure to download the .dir metadata. # need to improve this # We usually don't count .dir as a file while counting total_files_in_directory. # However, here we failed to download the .dir folder itself. So we need to make From 482c1e03e177f81dc85291715ec3b2448c44ecfe Mon Sep 17 00:00:00 2001 From: abhinav chobey Date: Mon, 16 Dec 2024 02:48:48 -0800 Subject: [PATCH 18/41] added comments --- cmflib/storage_backends/amazonS3_artifacts.py | 8 +-- cmflib/storage_backends/local_artifacts.py | 50 +++++++++++++++--- cmflib/storage_backends/minio_artifacts.py | 52 ++++++++++++++++--- 3 files changed, 93 insertions(+), 17 deletions(-) diff --git a/cmflib/storage_backends/amazonS3_artifacts.py b/cmflib/storage_backends/amazonS3_artifacts.py index c13f47c2..3d4a4b4b 100644 --- a/cmflib/storage_backends/amazonS3_artifacts.py +++ b/cmflib/storage_backends/amazonS3_artifacts.py @@ -49,9 +49,9 @@ def download_file( Download a single file from an S3 bucket. Args: - current_directory (str): The current working directory (unused here but might be useful in some contexts). - bucket_name (str): Name of the S3 bucket. - object_name (str): Key (path) of the file in the S3 bucket. + current_directory (str): The current working directory. + bucket_name (str): Name of the s3 bucket. + object_name (str): Key (path) of the file in the s3 bucket. download_loc (str): Local path where the file should be downloaded. Returns: @@ -91,7 +91,7 @@ def download_directory(self, Download a directory from an S3 bucket using its .dir metadata object. Args: - current_directory (str): The current working directory (unused here but might be useful in some contexts). + current_directory (str): The current working directory . bucket_name (str): Name of the S3 bucket. object_name (str): Key (path) of the .dir object in the S3 bucket. download_loc (str): Local directory path where the directory should be downloaded. diff --git a/cmflib/storage_backends/local_artifacts.py b/cmflib/storage_backends/local_artifacts.py index 4ab19c15..9e8b1af9 100644 --- a/cmflib/storage_backends/local_artifacts.py +++ b/cmflib/storage_backends/local_artifacts.py @@ -19,11 +19,17 @@ from cmflib.cmf_exception_handling import ObjectDownloadSuccess class LocalArtifacts(): - # This class downloads one local artifact at a time and if the passed artifact is a directory - # then, it downloads all the files from the directory + """ + Initialize the LocalArtifacts class with local repo url. + This class downloads one local artifact at a time and if the passed artifact is a directory + then, it downloads all the files from the directory + Args: + dvc_config_op (dict): Dictionary containing local url (remote.local.url). + """ + def __init__(self, dvc_config_op): - self.fs = fs = DVCFileSystem( + self.fs = DVCFileSystem( dvc_config_op["remote.local-storage.url"] ) # dvc_config_op[1] is file system path - "/path/to/local/repository" @@ -33,8 +39,22 @@ def download_file( object_name: str, download_loc: str, ): + """ + Download a single file from an S3 bucket. + + Args: + current_directory (str): The current working directory. + bucket_name (str): Name of the local bucket. + object_name (str): Key (path) of the file in the local repo. + download_loc (str): Local path where the file should be downloaded. + + Returns: + tuple: (object_name, download_loc, status) where status indicates success (True) or failure (False). + """ # get_file() only creates file, to put artifacts in proper directory, subfolders are required. # download_loc = contains absolute path of the file with file name and extension + + # Create necessary directories for the download location. dir_path = "" if "/" in download_loc: dir_path, _ = download_loc.rsplit("/", 1) @@ -42,8 +62,11 @@ def download_file( os.makedirs(dir_path, mode=0o777, exist_ok=True) # creating subfolders if needed try: + # get_file() returns none when file gets downloaded. response = self.fs.get_file(object_name, download_loc) - if response == None: # get_file() returns none when file gets downloaded. + + # Check if the response indicates success. + if response == None: return object_name, download_loc, True else: return object_name, download_loc, False @@ -57,6 +80,19 @@ def download_directory( object_name: str, download_loc: str, ): + """ + Download a directory from an local repo using its .dir metadata object. + + Args: + current_directory (str): The current working directory . + bucket_name (str): Name of the local bucket. + object_name (str): Key (path) of the .dir object in the local bucket. + download_loc (str): Local directory path where the directory should be downloaded. + + Returns: + tuple: (total_files_in_directory, files_downloaded, status) where status indicates success (True) or failure (False). + """ + # get_file() only creates file, to put artifacts in proper directory, subfolders are required. # download_loc = contains absolute path of the file with file name and extension dir_path = "" @@ -74,11 +110,13 @@ def download_directory( os.makedirs(download_loc, mode=0o777, exist_ok=True) total_files_in_directory = 0 files_downloaded = 0 - # download the .dir object + + # Temporary file to download the .dir metadata object. temp_dir = f"{download_loc}/dir" try: - # we are getting .dir object which contains information about all the files tracked inside it + # Download the .dir file containing metadata about tracked files. response = self.fs.get_file(object_name, temp_dir) + with open(temp_dir, 'r') as file: tracked_files = eval(file.read()) diff --git a/cmflib/storage_backends/minio_artifacts.py b/cmflib/storage_backends/minio_artifacts.py index af27e6b1..edbc3f16 100644 --- a/cmflib/storage_backends/minio_artifacts.py +++ b/cmflib/storage_backends/minio_artifacts.py @@ -22,7 +22,13 @@ class MinioArtifacts: def __init__(self, dvc_config_op): - # dvc_config_op["remote.minio.endpointurl"] = http://XX.XX.XX.XX:9000 + """ + Initialize the MinioArtifacts class with minios3 repo credentials. + dvc_config_op["remote.minio.endpointurl"] = http://XX.XX.XX.XX:9000 + Args: + dvc_config_op (dict): Dictionary containing local url (remote.local.url). + """ + self.endpoint = dvc_config_op["remote.minio.endpointurl"].split("http://")[1] self.access_key = dvc_config_op["remote.minio.access_key_id"] self.secret_key = dvc_config_op["remote.minio.secret_access_key"] @@ -38,11 +44,29 @@ def download_file( object_name: str, download_loc: str, ): + """ + Download a single file from an S3 bucket. + + Args: + current_directory (str): The current working directory. + bucket_name (str): Name of the minioS3 bucket. + object_name (str): Key (path) of the file in the minios3 repo. + download_loc (str): Local path where the file should be downloaded. + + Returns: + tuple: (object_name, download_loc, status) where status indicates success (True) or failure (False). + """ try: found = self.client.bucket_exists(bucket_name) - if not found: #check if minio bucket exists + + #check if minio bucket exists + if not found: raise BucketNotFound(bucket_name) + + #Download file response = self.client.fget_object(bucket_name, object_name, download_loc) + + # Check if the response indicates success. if response: return object_name, download_loc, True else: @@ -58,6 +82,19 @@ def download_directory( object_name: str, download_loc: str, ): + """ + Download a directory from an minios3 repo using its .dir metadata object. + + Args: + current_directory (str): The current working directory . + bucket_name (str): Name of the minios3 bucket. + object_name (str): Key (path) of the .dir object in the minios3 bucket. + download_loc (str): Local directory path where the directory should be downloaded. + + Returns: + tuple: (total_files_in_directory, files_downloaded, status) where status indicates success (True) or failure (False). + """ + found = self.client.bucket_exists(bucket_name) if not found: #check if minio bucket exists raise BucketNotFound(bucket_name) @@ -73,14 +110,14 @@ def download_directory( total_files_in_directory = 0 files_downloaded = 0 - # download .dir object + + # Temporary file to download the .dir metadata object. temp_dir = f"{download_loc}/temp_dir" try: + # Download the .dir file containing metadata about tracked files. + response = self.client.fget_object(bucket_name, object_name, temp_dir) - # except Exception as e: - # print(f"object {object_name} is not downloaded.") - # return total_files_in_directory,file_download_success,download_failure_return_code - + with open(temp_dir, 'r') as file: tracked_files = eval(file.read()) @@ -119,6 +156,7 @@ def download_directory( print(f"object {temp_object_name} is not downloaded.") except Exception as e: print(f"object {temp_object_name} is not downloaded.") + # total_files - files_downloaded gives us the number of files which are failed to download if (total_files_in_directory - files_downloaded) == 0: return total_files_in_directory, files_downloaded, True From a173286387aeb99c606a9d0e2e67da635e30f196 Mon Sep 17 00:00:00 2001 From: abhinav chobey Date: Wed, 18 Dec 2024 06:08:33 -0800 Subject: [PATCH 19/41] added cmf exception and success handling also did testing --- cmflib/cmf_exception_handling.py | 52 +++++++++++++++++++++++++++--- cmflib/commands/artifact/list.py | 25 +++++++------- cmflib/commands/artifact/pull.py | 4 +-- cmflib/commands/execution/list.py | 26 +++++++-------- cmflib/commands/metadata/export.py | 25 +++++++------- cmflib/commands/pipeline/list.py | 13 ++++---- 6 files changed, 95 insertions(+), 50 deletions(-) diff --git a/cmflib/cmf_exception_handling.py b/cmflib/cmf_exception_handling.py index 949743f9..2aadcae4 100644 --- a/cmflib/cmf_exception_handling.py +++ b/cmflib/cmf_exception_handling.py @@ -15,6 +15,7 @@ ### #!/usr/bin/env python3 +from typing import Optional, List class CmfResponse(Exception): """ @@ -47,7 +48,7 @@ class ExecutionsAlreadyExists(CmfSuccess): def __init__(self, return_code=201): super().__init__(return_code) - def handle(): + def handle(self): return "INFO: Executions already exists." class ObjectDownloadSuccess(CmfSuccess): @@ -117,6 +118,26 @@ def __init__(self, message, return_code=209): def handle(self): return self.message +class MetadataExportToJson(CmfSuccess): + def __init__(self,full_path_to_dump, return_code=210): + self.full_path_to_dump = full_path_to_dump + super().__init__(return_code) + + def handle(self): + return f"SUCCESS: metadata successfully exported in {self.full_path_to_dump}." + +# This class is created for messages like "Done", "Records not found" +class Msg(CmfSuccess): + def __init__(self,msg_str: Optional[str] = None, msg_list: Optional[List[str]] = None, return_code=211): + self.msg_str = msg_str + self.msg_list = msg_list + super().__init__(return_code) + + def handle(self): + if self.msg_list != None: + return self.msg_list + else: + return self.msg_str ''' CMF FAILURE CLASSES''' @@ -260,12 +281,12 @@ def handle(self): return "ERROR: Unable to pull metadata file." class DirectoryNotfound(CmfFailure): - def __init__(self,current_dir, return_code=119): - self.current_dir = current_dir + def __init__(self,dir, return_code=119): + self.dir = dir super().__init__(return_code) def handle(self): - return f"ERROR: {self.current_dir} doesn't exists." + return f"ERROR: {self.dir} doesn't exists." class FileNameNotfound(CmfFailure): def __init__(self, return_code=120): @@ -288,3 +309,26 @@ def __init__(self, return_code=122): def handle(self): return "ERROR: Invalid tensorboard logs path. Provide valid file/folder path for tensorboard logs!!" +class MultipleArgumentNotAllowed(CmfFailure): + def __init__(self,argument_name, argument_flag, return_code=123): + self.argument_flag = argument_flag + self.argument_name = argument_name + super().__init__(return_code) + + def handle(self): + return f"Error: You can only provide one {self.argument_name} using the {self.argument_flag} flag." + +class MissingArgument(CmfFailure): + def __init__(self,argument_name,return_code=124): + self.argument_name = argument_name + super().__init__(return_code) + + def handle(self): + return f"Error: Missing {self.argument_name}" + +class NoChangesMadeError(CmfFailure): + def __init__(self,return_code=125): + super().__init__(return_code) + + def handle(self): + return "INFO: No changes made to the file. Operation aborted." \ No newline at end of file diff --git a/cmflib/commands/artifact/list.py b/cmflib/commands/artifact/list.py index 4a5aedf0..a9dc6f19 100644 --- a/cmflib/commands/artifact/list.py +++ b/cmflib/commands/artifact/list.py @@ -22,7 +22,7 @@ from tabulate import tabulate from cmflib.cli.command import CmdBase from cmflib import cmfquery -from cmflib.cmf_exception_handling import PipelineNotFound, FileNotFound, ArtifactNotFound +from cmflib.cmf_exception_handling import PipelineNotFound, FileNotFound, ArtifactNotFound, CmfNotConfigured, MultipleArgumentNotAllowed, MissingArgument, Msg from cmflib.dvc_wrapper import dvc_get_config from typing import Union, List @@ -133,7 +133,7 @@ def run(self): msg = "'cmf' is not configured.\nExecute 'cmf init' command." result = dvc_get_config() if len(result) == 0: - return msg + raise CmfNotConfigured(msg) # default path for mlmd file name mlmd_file_name = "./mlmd" @@ -141,25 +141,24 @@ def run(self): if not self.args.file_name: # If self.args.file_name is None or an empty list ([]). mlmd_file_name = "./mlmd" # Default path for mlmd file name. elif len(self.args.file_name) > 1: # If the user provided more than one file name. - return "Error: You can only provide one file name using the -f flag." + raise MultipleArgumentNotAllowed("file_name", "-f") elif not self.args.file_name[0]: # self.args.file_name[0] is an empty string (""). - return "Error: Missing File name" + raise MissingArgument("file name") else: mlmd_file_name = self.args.file_name[0].strip() - current_directory = os.path.dirname(self.args.file_name) if mlmd_file_name == "mlmd": mlmd_file_name = "./mlmd" + current_directory = os.path.dirname(mlmd_file_name) if not os.path.exists(mlmd_file_name): raise FileNotFound(mlmd_file_name, current_directory) - # Creating cmfquery object. query = cmfquery.CmfQuery(mlmd_file_name) # Check if pipeline exists in mlmd. if self.args.pipeline_name is not None and len(self.args.pipeline_name) > 1: - return "Error: You can only provide one pipeline name using the -p flag." + raise MultipleArgumentNotAllowed("pipeline_name", "-p") elif not self.args.pipeline_name[0]: # self.args.pipeline_name[0] is an empty string (""). - return "Error: Missing pipeline name" + raise MissingArgument("pipeline name") else: pipeline_name = self.args.pipeline_name[0] @@ -171,9 +170,9 @@ def run(self): if not self.args.artifact_name: # If self.args.artifact_name is None or an empty list ([]). pass elif len(self.args.artifact_name) > 1: # If the user provided more than one artifact_name. - return "Error: You can only provide one artifact name using the -a flag." + raise MultipleArgumentNotAllowed("artifact_name", "-a") elif not self.args.artifact_name[0]: # self.args.artifact_name[0] is an empty string (""). - return "Error: Missing artifact name" + raise MissingArgument("artifact name") else: artifact_ids = self.search_artifact(df) if(artifact_ids != -1): @@ -221,14 +220,14 @@ def run(self): user_input = input("Press Enter to see more records if exists or 'q' to quit: ").strip().lower() if user_input == 'q': break - return "End of records.." + return Msg(msg_str = "End of records..") else: - return ArtifactNotFound(self.args.artifact_name) + raise ArtifactNotFound(self.args.artifact_name) df = self.convert_to_datetime(df, "create_time_since_epoch") self.display_table(df) - return "Done." + return Msg(msg_str = "Done.") def add_parser(subparsers, parent_parser): diff --git a/cmflib/commands/artifact/pull.py b/cmflib/commands/artifact/pull.py index 417ca712..b226fcbd 100644 --- a/cmflib/commands/artifact/pull.py +++ b/cmflib/commands/artifact/pull.py @@ -182,9 +182,9 @@ def run(self): current_directory = os.path.dirname(mlmd_file_name) if not os.path.exists(mlmd_file_name): #checking if MLMD files exists raise FileNotFound(mlmd_file_name, current_directory) - if self.args.artifact_name == "": + if not self.args.artifact_name[0]: # checking if user has not given -a as "" raise ArtifactNotFound("") - if self.args.pipeline_name == "": #checking if pipeline_name is not "" + if not self.args.pipeline_name[0]: # checking if user has not given -p as "" raise PipelineNotFound(self.args.pipeline_name) query = cmfquery.CmfQuery(mlmd_file_name) if not query.get_pipeline_id(self.args.pipeline_name) > 0: #checking if pipeline name exists in mlmd diff --git a/cmflib/commands/execution/list.py b/cmflib/commands/execution/list.py index 5816d108..fb118f09 100644 --- a/cmflib/commands/execution/list.py +++ b/cmflib/commands/execution/list.py @@ -23,7 +23,7 @@ from cmflib import cmfquery from tabulate import tabulate from cmflib.dvc_wrapper import dvc_get_config - +from cmflib.cmf_exception_handling import PipelineNotFound, FileNotFound, CmfNotConfigured, MultipleArgumentNotAllowed, MissingArgument, Msg, ExecutionsNotFound class CmdExecutionList(CmdBase): def display_table(self, df: pd.DataFrame) -> None: @@ -78,15 +78,15 @@ def run(self): msg = "'cmf' is not configured.\nExecute 'cmf init' command." result = dvc_get_config() if len(result) == 0: - return msg + raise CmfNotConfigured(msg) current_directory = os.getcwd() if not self.args.file_name: # If self.args.file_name is None or an empty list ([]). mlmd_file_name = "./mlmd" # Default path for mlmd file name. elif len(self.args.file_name) > 1: # If the user provided more than one file name. - return "Error: You can only provide one file name using the -f flag." + raise MultipleArgumentNotAllowed("file_name", "-f") elif not self.args.file_name[0]: # self.args.file_name[0] is an empty string (""). - return "Error: Missing File name" + raise MissingArgument("file name") else: mlmd_file_name = self.args.file_name[0].strip() if mlmd_file_name == "mlmd": @@ -94,16 +94,16 @@ def run(self): current_directory = os.path.dirname(mlmd_file_name) if not os.path.exists(mlmd_file_name): - return f"Error: {mlmd_file_name} doesn't exists in {current_directory} directory." + raise FileNotFound(mlmd_file_name, current_directory) # Creating cmfquery object. query = cmfquery.CmfQuery(mlmd_file_name) # Check if pipeline exists in mlmd. if self.args.pipeline_name is not None and len(self.args.pipeline_name) > 1: - return "Error: You can only provide one pipeline name using the -p flag." + raise MultipleArgumentNotAllowed("pipeline_name", "-p") elif not self.args.pipeline_name[0]: # self.args.pipeline_name[0] is an empty string (""). - return "Error: Missing pipeline name" + raise MissingArgument("pipeline name") else: pipeline_name = self.args.pipeline_name[0] @@ -111,7 +111,7 @@ def run(self): # Check if the DataFrame is empty, indicating the pipeline name does not exist. if df.empty: - return "Pipeline does not exist.." + raise PipelineNotFound(pipeline_name) else: # Drop the 'Python_Env' column if it exists in the DataFrame. if "Python_Env" in df.columns: @@ -121,9 +121,9 @@ def run(self): if not self.args.execution_id: # If self.args.execution_id is None or an empty list ([]). pass elif len(self.args.execution_id) > 1: # If the user provided more than one execution_id. - return "Error: You can only provide one execution id using the -e flag." + raise MultipleArgumentNotAllowed("execution_id", "-e") elif not self.args.execution_id[0]: # self.args.execution_id[0] is an empty string (""). - return "Error: Missing execution id" + raise MissingArgument("execution id") else: if self.args.execution_id[0].isdigit(): if int(self.args.execution_id[0]) in list(df['id']): # Converting series to list. @@ -157,11 +157,11 @@ def run(self): ) print(table) print() - return "Done" - return "Execution id does not exist.." + return Msg(msg_str = "Done.") + raise ExecutionsNotFound(self.args.execution_id[0]) self.display_table(df) - return "Done" + return Msg(msg_str = "Done.") def add_parser(subparsers, parent_parser): diff --git a/cmflib/commands/metadata/export.py b/cmflib/commands/metadata/export.py index 332febc6..b6e824a5 100644 --- a/cmflib/commands/metadata/export.py +++ b/cmflib/commands/metadata/export.py @@ -22,6 +22,7 @@ from cmflib import cmfquery from cmflib.cli.command import CmdBase from cmflib.dvc_wrapper import dvc_get_config +from cmflib.cmf_exception_handling import PipelineNotFound, FileNotFound, CmfNotConfigured, MultipleArgumentNotAllowed, MissingArgument, Msg, NoChangesMadeError, MetadataExportToJson # This class export local mlmd data to a json file class CmdMetadataExport(CmdBase): @@ -44,7 +45,7 @@ def run(self): msg = "'cmf' is not configured.\nExecute 'cmf init' command." result = dvc_get_config() if len(result) == 0: - return msg + raise CmfNotConfigured(msg) current_directory = os.getcwd() full_path_to_dump = "" @@ -52,9 +53,9 @@ def run(self): if not self.args.file_name: # If self.args.file_name is None or an empty list ([]). mlmd_file_name = "./mlmd" # Default path for mlmd file name. elif len(self.args.file_name) > 1: # If the user provided more than one file name. - return "Error: You can only provide one file name using the -f flag." + raise MultipleArgumentNotAllowed("file_name", "-f") elif not self.args.file_name[0]: # self.args.file_name[0] is an empty string (""). - return "Error: Missing File name" + raise MissingArgument("file name") else: mlmd_file_name = self.args.file_name[0].strip() # Removing starting and ending whitespaces. if mlmd_file_name == "mlmd": @@ -62,16 +63,16 @@ def run(self): current_directory = os.path.dirname(mlmd_file_name) if not os.path.exists(mlmd_file_name): - return f"Error: {mlmd_file_name} doesn't exists in {current_directory} directory." + raise FileNotFound(mlmd_file_name, current_directory) # Initialising cmfquery class. query = cmfquery.CmfQuery(mlmd_file_name) # Check if pipeline exists in mlmd . if self.args.pipeline_name is not None and len(self.args.pipeline_name) > 1: - return "Error: You can only provide one pipeline name using the -p flag." + raise MultipleArgumentNotAllowed("pipeline_name", "-p") elif not self.args.pipeline_name[0]: # self.args.pipeline_name[0] is an empty string (""). - return "Error: Missing pipeline name" + raise MissingArgument("pipeline name") else: pipeline_name = self.args.pipeline_name[0] @@ -81,9 +82,9 @@ def run(self): if not self.args.json_file_name: # If self.args.json_file_name is None or an empty list ([]). json_file_name = self.args.json_file_name elif len(self.args.json_file_name) > 1: # If the user provided more than one json file name. - return "Error: You can provide only one json file name using the -j flag." + raise MultipleArgumentNotAllowed("json file", "-j") elif not self.args.json_file_name[0]: # self.args.json_file_name[0] is an empty string (""). - return "Error: Missing Json file name" + raise MissingArgument("json file") else: json_file_name = self.args.json_file_name[0].strip() @@ -96,7 +97,7 @@ def run(self): if userRespone.lower() == "yes": # Overwrite file. full_path_to_dump = self.create_full_path(current_directory, json_file_name) else: - return "No changes made to the file. Operation aborted." + raise NoChangesMadeError() else: full_path_to_dump = self.create_full_path(current_directory, json_file_name) else: @@ -106,7 +107,7 @@ def run(self): if userRespone.lower() == "yes": full_path_to_dump = os.getcwd() + f"/{pipeline_name}.json" else: - return "No changes made to the file. Operation aborted." + raise NoChangesMadeError() else: full_path_to_dump = os.getcwd() + f"/{pipeline_name}.json" @@ -116,9 +117,9 @@ def run(self): # Write metadata into json file. with open(full_path_to_dump, 'w') as f: f.write(json.dumps(json.loads(json_payload),indent=2)) - return f"SUCCESS: metadata successfully exported in {full_path_to_dump}." + return MetadataExportToJson(full_path_to_dump) else: - return f"{pipeline_name} doesn't exists in {mlmd_file_name}!!" + raise PipelineNotFound(pipeline_name) diff --git a/cmflib/commands/pipeline/list.py b/cmflib/commands/pipeline/list.py index fe597870..04f15992 100644 --- a/cmflib/commands/pipeline/list.py +++ b/cmflib/commands/pipeline/list.py @@ -20,6 +20,7 @@ from cmflib.cli.command import CmdBase from cmflib import cmfquery from cmflib.dvc_wrapper import dvc_get_config +from cmflib.cmf_exception_handling import FileNotFound, CmfNotConfigured, MultipleArgumentNotAllowed, MissingArgument, Msg class CmdPipelineList(CmdBase): def run(self): @@ -27,15 +28,15 @@ def run(self): msg = "'cmf' is not configured.\nExecute 'cmf init' command." result = dvc_get_config() if len(result) == 0: - return msg + raise CmfNotConfigured(msg) current_directory = os.getcwd() if not self.args.file_name: # If self.args.file_name is None or an empty list ([]). mlmd_file_name = "./mlmd" # Default path for mlmd file name. elif len(self.args.file_name) > 1: # If the user provided more than one file name. - return "Error: You can only provide one file name using the -f flag." + raise MultipleArgumentNotAllowed("file_name", "-f") elif not self.args.file_name[0]: # self.args.file_name[0] is an empty string (""). - return "Error: Missing File name" + raise MissingArgument("file name") else: mlmd_file_name = self.args.file_name[0].strip() if mlmd_file_name == "mlmd": @@ -43,12 +44,12 @@ def run(self): current_directory = os.path.dirname(mlmd_file_name) if not os.path.exists(mlmd_file_name): - return f"Error: {mlmd_file_name} doesn't exists in {current_directory} directory." + raise FileNotFound(mlmd_file_name, current_directory) # Creating cmfquery object. query = cmfquery.CmfQuery(mlmd_file_name) - - return [pipeline.name for pipeline in query._get_pipelines()] + + return Msg(msg_list = [pipeline.name for pipeline in query._get_pipelines()]) def add_parser(subparsers, parent_parser): From 9c15ad616e86b8669b2c727dce82ba625fa12506 Mon Sep 17 00:00:00 2001 From: abhinav chobey Date: Fri, 20 Dec 2024 01:21:31 -0800 Subject: [PATCH 20/41] update --- cmflib/cmf_exception_handling.py | 20 ++++++++++++++++---- cmflib/commands/artifact/list.py | 12 ++++++------ cmflib/commands/artifact/pull.py | 19 ++++++++++++------- cmflib/commands/artifact/push.py | 9 +++++++-- cmflib/commands/execution/list.py | 12 ++++++------ cmflib/commands/metadata/export.py | 12 ++++++------ cmflib/commands/pipeline/list.py | 6 +++--- cmflib/storage_backends/osdf_artifacts.py | 11 +++++------ 8 files changed, 61 insertions(+), 40 deletions(-) diff --git a/cmflib/cmf_exception_handling.py b/cmflib/cmf_exception_handling.py index 2aadcae4..14958378 100644 --- a/cmflib/cmf_exception_handling.py +++ b/cmflib/cmf_exception_handling.py @@ -127,7 +127,7 @@ def handle(self): return f"SUCCESS: metadata successfully exported in {self.full_path_to_dump}." # This class is created for messages like "Done", "Records not found" -class Msg(CmfSuccess): +class MsgSuccess(CmfSuccess): def __init__(self,msg_str: Optional[str] = None, msg_list: Optional[List[str]] = None, return_code=211): self.msg_str = msg_str self.msg_list = msg_list @@ -309,7 +309,7 @@ def __init__(self, return_code=122): def handle(self): return "ERROR: Invalid tensorboard logs path. Provide valid file/folder path for tensorboard logs!!" -class MultipleArgumentNotAllowed(CmfFailure): +class DuplicateArgumentNotAllowed(CmfFailure): def __init__(self,argument_name, argument_flag, return_code=123): self.argument_flag = argument_flag self.argument_name = argument_name @@ -326,9 +326,21 @@ def __init__(self,argument_name,return_code=124): def handle(self): return f"Error: Missing {self.argument_name}" -class NoChangesMadeError(CmfFailure): +class NoChangesMadeInfo(CmfFailure): def __init__(self,return_code=125): super().__init__(return_code) def handle(self): - return "INFO: No changes made to the file. Operation aborted." \ No newline at end of file + return "INFO: No changes made to the file. Operation aborted." + +class MsgFailure(CmfFailure): + def __init__(self,msg_str: Optional[str] = None, msg_list: Optional[List[str]] = None, return_code=126): + self.msg_str = msg_str + self.msg_list = msg_list + super().__init__(return_code) + + def handle(self): + if self.msg_list != None: + return self.msg_list + else: + return self.msg_str diff --git a/cmflib/commands/artifact/list.py b/cmflib/commands/artifact/list.py index a9dc6f19..97ea278f 100644 --- a/cmflib/commands/artifact/list.py +++ b/cmflib/commands/artifact/list.py @@ -22,7 +22,7 @@ from tabulate import tabulate from cmflib.cli.command import CmdBase from cmflib import cmfquery -from cmflib.cmf_exception_handling import PipelineNotFound, FileNotFound, ArtifactNotFound, CmfNotConfigured, MultipleArgumentNotAllowed, MissingArgument, Msg +from cmflib.cmf_exception_handling import PipelineNotFound, FileNotFound, ArtifactNotFound, CmfNotConfigured, DuplicateArgumentNotAllowed, MissingArgument, MsgSuccess from cmflib.dvc_wrapper import dvc_get_config from typing import Union, List @@ -141,7 +141,7 @@ def run(self): if not self.args.file_name: # If self.args.file_name is None or an empty list ([]). mlmd_file_name = "./mlmd" # Default path for mlmd file name. elif len(self.args.file_name) > 1: # If the user provided more than one file name. - raise MultipleArgumentNotAllowed("file_name", "-f") + raise DuplicateArgumentNotAllowed("file_name", "-f") elif not self.args.file_name[0]: # self.args.file_name[0] is an empty string (""). raise MissingArgument("file name") else: @@ -156,7 +156,7 @@ def run(self): # Check if pipeline exists in mlmd. if self.args.pipeline_name is not None and len(self.args.pipeline_name) > 1: - raise MultipleArgumentNotAllowed("pipeline_name", "-p") + raise DuplicateArgumentNotAllowed("pipeline_name", "-p") elif not self.args.pipeline_name[0]: # self.args.pipeline_name[0] is an empty string (""). raise MissingArgument("pipeline name") else: @@ -170,7 +170,7 @@ def run(self): if not self.args.artifact_name: # If self.args.artifact_name is None or an empty list ([]). pass elif len(self.args.artifact_name) > 1: # If the user provided more than one artifact_name. - raise MultipleArgumentNotAllowed("artifact_name", "-a") + raise DuplicateArgumentNotAllowed("artifact_name", "-a") elif not self.args.artifact_name[0]: # self.args.artifact_name[0] is an empty string (""). raise MissingArgument("artifact name") else: @@ -220,14 +220,14 @@ def run(self): user_input = input("Press Enter to see more records if exists or 'q' to quit: ").strip().lower() if user_input == 'q': break - return Msg(msg_str = "End of records..") + return MsgSuccess(msg_str = "End of records..") else: raise ArtifactNotFound(self.args.artifact_name) df = self.convert_to_datetime(df, "create_time_since_epoch") self.display_table(df) - return Msg(msg_str = "Done.") + return MsgSuccess(msg_str = "Done.") def add_parser(subparsers, parent_parser): diff --git a/cmflib/commands/artifact/pull.py b/cmflib/commands/artifact/pull.py index b226fcbd..d6b14971 100644 --- a/cmflib/commands/artifact/pull.py +++ b/cmflib/commands/artifact/pull.py @@ -36,7 +36,9 @@ BatchDownloadFailure, BatchDownloadSuccess, ObjectDownloadFailure, - ObjectDownloadSuccess + ObjectDownloadSuccess, + MsgSuccess, + MsgFailure ) from cmflib.cli.utils import check_minio_server @@ -543,7 +545,7 @@ def run(self): raise ArtifactNotFound(self.args.artifact_name) else: args = self.extract_repo_args("osdf", output[0], output[1], current_directory) - return_code = osdfremote_class_obj.download_artifacts( + download_flag, message = osdfremote_class_obj.download_artifacts( dvc_config_op, args[0], # s_url of the artifact cache_path, @@ -553,10 +555,10 @@ def run(self): output[3] #Artifact Hash ) - if return_code == 206: - status = ObjectDownloadSuccess(args[0],args[1]) + if download_flag : + status = MsgSuccess(msg_str = message) else: - status = ObjectDownloadFailure(args[0],args[1]) + status = MsgFailure(msg_str = message) return status else: for name, url in name_url_dict.items(): @@ -568,7 +570,7 @@ def run(self): #print(f"Hash for the artifact {name} is {artifact_hash}") args = self.extract_repo_args("osdf", name, url, current_directory) - return_code = osdfremote_class_obj.download_artifacts( + download_flag, message = osdfremote_class_obj.download_artifacts( dvc_config_op, args[0], # host, cache_path, @@ -577,8 +579,11 @@ def run(self): args[2], # name artifact_hash #Artifact Hash ) - if return_code == 206: + if download_flag: + print(message) #### success message file_downloaded +=1 + else: + print(message) ### failure message Files_failed_to_download = total_files_count - files_downloaded if Files_failed_to_download == 0: status = BatchDownloadSuccess(files_downloaded=files_downloaded) diff --git a/cmflib/commands/artifact/push.py b/cmflib/commands/artifact/push.py index f75ce566..1cfa1a82 100644 --- a/cmflib/commands/artifact/push.py +++ b/cmflib/commands/artifact/push.py @@ -35,17 +35,22 @@ def run(self): result = "" dvc_config_op = DvcConfig.get_dvc_config() cmf_config_file = os.environ.get("CONFIG_FILE", ".cmfconfig") + # find root_dir of .cmfconfig output = find_root(cmf_config_file) + # in case, there is no .cmfconfig file if output.find("'cmf' is not configured.") != -1: raise CmfNotConfigured(output) - cmf_config={} - cmf_config=CmfConfig.read_config(cmf_config_file) + + out_msg = check_minio_server(dvc_config_op) if dvc_config_op["core.remote"] == "minio" and out_msg != "SUCCESS": raise Minios3ServerInactive() if dvc_config_op["core.remote"] == "osdf": + config_file_path = os.path.join(output, cmf_config_file) + cmf_config={} + cmf_config=CmfConfig.read_config(config_file_path) #print("key_id="+cmf_config["osdf-key_id"]) dynamic_password = generate_osdf_token(cmf_config["osdf-key_id"],cmf_config["osdf-key_path"],cmf_config["osdf-key_issuer"]) #print("Dynamic Password"+dynamic_password) diff --git a/cmflib/commands/execution/list.py b/cmflib/commands/execution/list.py index fb118f09..9c1c8834 100644 --- a/cmflib/commands/execution/list.py +++ b/cmflib/commands/execution/list.py @@ -23,7 +23,7 @@ from cmflib import cmfquery from tabulate import tabulate from cmflib.dvc_wrapper import dvc_get_config -from cmflib.cmf_exception_handling import PipelineNotFound, FileNotFound, CmfNotConfigured, MultipleArgumentNotAllowed, MissingArgument, Msg, ExecutionsNotFound +from cmflib.cmf_exception_handling import PipelineNotFound, FileNotFound, CmfNotConfigured, DuplicateArgumentNotAllowed, MissingArgument, MsgSuccess, ExecutionsNotFound class CmdExecutionList(CmdBase): def display_table(self, df: pd.DataFrame) -> None: @@ -84,7 +84,7 @@ def run(self): if not self.args.file_name: # If self.args.file_name is None or an empty list ([]). mlmd_file_name = "./mlmd" # Default path for mlmd file name. elif len(self.args.file_name) > 1: # If the user provided more than one file name. - raise MultipleArgumentNotAllowed("file_name", "-f") + raise DuplicateArgumentNotAllowed("file_name", "-f") elif not self.args.file_name[0]: # self.args.file_name[0] is an empty string (""). raise MissingArgument("file name") else: @@ -101,7 +101,7 @@ def run(self): # Check if pipeline exists in mlmd. if self.args.pipeline_name is not None and len(self.args.pipeline_name) > 1: - raise MultipleArgumentNotAllowed("pipeline_name", "-p") + raise DuplicateArgumentNotAllowed("pipeline_name", "-p") elif not self.args.pipeline_name[0]: # self.args.pipeline_name[0] is an empty string (""). raise MissingArgument("pipeline name") else: @@ -121,7 +121,7 @@ def run(self): if not self.args.execution_id: # If self.args.execution_id is None or an empty list ([]). pass elif len(self.args.execution_id) > 1: # If the user provided more than one execution_id. - raise MultipleArgumentNotAllowed("execution_id", "-e") + raise DuplicateArgumentNotAllowed("execution_id", "-e") elif not self.args.execution_id[0]: # self.args.execution_id[0] is an empty string (""). raise MissingArgument("execution id") else: @@ -157,11 +157,11 @@ def run(self): ) print(table) print() - return Msg(msg_str = "Done.") + return MsgSuccess(msg_str = "Done.") raise ExecutionsNotFound(self.args.execution_id[0]) self.display_table(df) - return Msg(msg_str = "Done.") + return MsgSuccess(msg_str = "Done.") def add_parser(subparsers, parent_parser): diff --git a/cmflib/commands/metadata/export.py b/cmflib/commands/metadata/export.py index b6e824a5..a2c21177 100644 --- a/cmflib/commands/metadata/export.py +++ b/cmflib/commands/metadata/export.py @@ -22,7 +22,7 @@ from cmflib import cmfquery from cmflib.cli.command import CmdBase from cmflib.dvc_wrapper import dvc_get_config -from cmflib.cmf_exception_handling import PipelineNotFound, FileNotFound, CmfNotConfigured, MultipleArgumentNotAllowed, MissingArgument, Msg, NoChangesMadeError, MetadataExportToJson +from cmflib.cmf_exception_handling import PipelineNotFound, FileNotFound, CmfNotConfigured, DuplicateArgumentNotAllowed, MissingArgument,NoChangesMadeInfo, MetadataExportToJson # This class export local mlmd data to a json file class CmdMetadataExport(CmdBase): @@ -53,7 +53,7 @@ def run(self): if not self.args.file_name: # If self.args.file_name is None or an empty list ([]). mlmd_file_name = "./mlmd" # Default path for mlmd file name. elif len(self.args.file_name) > 1: # If the user provided more than one file name. - raise MultipleArgumentNotAllowed("file_name", "-f") + raise DuplicateArgumentNotAllowed("file_name", "-f") elif not self.args.file_name[0]: # self.args.file_name[0] is an empty string (""). raise MissingArgument("file name") else: @@ -70,7 +70,7 @@ def run(self): # Check if pipeline exists in mlmd . if self.args.pipeline_name is not None and len(self.args.pipeline_name) > 1: - raise MultipleArgumentNotAllowed("pipeline_name", "-p") + raise DuplicateArgumentNotAllowed("pipeline_name", "-p") elif not self.args.pipeline_name[0]: # self.args.pipeline_name[0] is an empty string (""). raise MissingArgument("pipeline name") else: @@ -82,7 +82,7 @@ def run(self): if not self.args.json_file_name: # If self.args.json_file_name is None or an empty list ([]). json_file_name = self.args.json_file_name elif len(self.args.json_file_name) > 1: # If the user provided more than one json file name. - raise MultipleArgumentNotAllowed("json file", "-j") + raise DuplicateArgumentNotAllowed("json file", "-j") elif not self.args.json_file_name[0]: # self.args.json_file_name[0] is an empty string (""). raise MissingArgument("json file") else: @@ -97,7 +97,7 @@ def run(self): if userRespone.lower() == "yes": # Overwrite file. full_path_to_dump = self.create_full_path(current_directory, json_file_name) else: - raise NoChangesMadeError() + raise NoChangesMadeInfo() else: full_path_to_dump = self.create_full_path(current_directory, json_file_name) else: @@ -107,7 +107,7 @@ def run(self): if userRespone.lower() == "yes": full_path_to_dump = os.getcwd() + f"/{pipeline_name}.json" else: - raise NoChangesMadeError() + raise NoChangesMadeInfo() else: full_path_to_dump = os.getcwd() + f"/{pipeline_name}.json" diff --git a/cmflib/commands/pipeline/list.py b/cmflib/commands/pipeline/list.py index 04f15992..1b195cd6 100644 --- a/cmflib/commands/pipeline/list.py +++ b/cmflib/commands/pipeline/list.py @@ -20,7 +20,7 @@ from cmflib.cli.command import CmdBase from cmflib import cmfquery from cmflib.dvc_wrapper import dvc_get_config -from cmflib.cmf_exception_handling import FileNotFound, CmfNotConfigured, MultipleArgumentNotAllowed, MissingArgument, Msg +from cmflib.cmf_exception_handling import FileNotFound, CmfNotConfigured, DuplicateArgumentNotAllowed, MissingArgument, MsgSuccess class CmdPipelineList(CmdBase): def run(self): @@ -34,7 +34,7 @@ def run(self): if not self.args.file_name: # If self.args.file_name is None or an empty list ([]). mlmd_file_name = "./mlmd" # Default path for mlmd file name. elif len(self.args.file_name) > 1: # If the user provided more than one file name. - raise MultipleArgumentNotAllowed("file_name", "-f") + raise DuplicateArgumentNotAllowed("file_name", "-f") elif not self.args.file_name[0]: # self.args.file_name[0] is an empty string (""). raise MissingArgument("file name") else: @@ -49,7 +49,7 @@ def run(self): # Creating cmfquery object. query = cmfquery.CmfQuery(mlmd_file_name) - return Msg(msg_list = [pipeline.name for pipeline in query._get_pipelines()]) + return MsgSuccess(msg_list = [pipeline.name for pipeline in query._get_pipelines()]) def add_parser(subparsers, parent_parser): diff --git a/cmflib/storage_backends/osdf_artifacts.py b/cmflib/storage_backends/osdf_artifacts.py index 21828e0e..5bfb0b98 100644 --- a/cmflib/storage_backends/osdf_artifacts.py +++ b/cmflib/storage_backends/osdf_artifacts.py @@ -18,7 +18,6 @@ import requests #import urllib3 #urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) -from cmflib.cmf_exception_handling import NoDataFoundOsdf import hashlib import time from urllib.parse import urlparse @@ -130,10 +129,10 @@ def download_artifacts( success, result = download_and_verify_file(host, headers, remote_file_path, local_file_path, artifact_hash, timeout=10) if success: #print(result) - return result + return success, result else: #print(f"Failed to download and verify file: {result}") - return f"Failed to download and verify file" + return success, f"Failed to download and verify file: {result}" else: #Generate Cached path for artifact cached_s_url=generate_cached_url(host,cache) @@ -141,7 +140,7 @@ def download_artifacts( success, cached_result = download_and_verify_file(cached_s_url, headers, remote_file_path, local_path, artifact_hash,timeout=5) if success: #print(cached_result) - return cached_result + return success, cached_result else: print(f"Failed to download and verify file from cache: {cached_result}") print(f"Trying Origin at {host}") @@ -149,10 +148,10 @@ def download_artifacts( success, origin_result = download_and_verify_file(host, headers, remote_file_path, local_path, artifact_hash, timeout=10) if success: #print(origin_result) - return origin_result + return success, origin_result else: #print(f"Failed to download and verify file: {result}") - return f"Failed to download and verify file" + return success, f"Failed to download and verify file: {origin_result}" From 355ef8d2a4500765ea1b23bb5cf2a578cbd5bb69 Mon Sep 17 00:00:00 2001 From: Varkha Sharma Date: Fri, 20 Dec 2024 02:16:44 -0800 Subject: [PATCH 21/41] made some review changes --- cmflib/commands/artifact/list.py | 17 +++++++++-------- cmflib/commands/execution/list.py | 17 +++++++++-------- cmflib/commands/metadata/export.py | 16 ++++++++-------- cmflib/commands/pipeline/list.py | 14 ++++++-------- cmflib/storage_backends/amazonS3_artifacts.py | 1 + cmflib/storage_backends/local_artifacts.py | 1 - 6 files changed, 33 insertions(+), 33 deletions(-) diff --git a/cmflib/commands/artifact/list.py b/cmflib/commands/artifact/list.py index 97ea278f..9670d040 100644 --- a/cmflib/commands/artifact/list.py +++ b/cmflib/commands/artifact/list.py @@ -20,11 +20,17 @@ import textwrap from tabulate import tabulate +from typing import Union, List from cmflib.cli.command import CmdBase from cmflib import cmfquery -from cmflib.cmf_exception_handling import PipelineNotFound, FileNotFound, ArtifactNotFound, CmfNotConfigured, DuplicateArgumentNotAllowed, MissingArgument, MsgSuccess -from cmflib.dvc_wrapper import dvc_get_config -from typing import Union, List +from cmflib.cmf_exception_handling import ( + PipelineNotFound, + FileNotFound, + ArtifactNotFound, + DuplicateArgumentNotAllowed, + MissingArgument, + MsgSuccess +) class CmdArtifactsList(CmdBase): def convert_to_datetime(self, df: pd.DataFrame, col_name: str) -> pd.DataFrame: @@ -129,11 +135,6 @@ def search_artifact(self, df: pd.DataFrame) -> Union[int, List[int]]: return -1 def run(self): - # Check if 'cmf' is configured. - msg = "'cmf' is not configured.\nExecute 'cmf init' command." - result = dvc_get_config() - if len(result) == 0: - raise CmfNotConfigured(msg) # default path for mlmd file name mlmd_file_name = "./mlmd" diff --git a/cmflib/commands/execution/list.py b/cmflib/commands/execution/list.py index 9c1c8834..4771c777 100644 --- a/cmflib/commands/execution/list.py +++ b/cmflib/commands/execution/list.py @@ -22,8 +22,15 @@ from cmflib.cli.command import CmdBase from cmflib import cmfquery from tabulate import tabulate -from cmflib.dvc_wrapper import dvc_get_config -from cmflib.cmf_exception_handling import PipelineNotFound, FileNotFound, CmfNotConfigured, DuplicateArgumentNotAllowed, MissingArgument, MsgSuccess, ExecutionsNotFound +from cmflib.cmf_exception_handling import ( + PipelineNotFound, + FileNotFound, + DuplicateArgumentNotAllowed, + MissingArgument, + MsgSuccess, + ExecutionsNotFound +) + class CmdExecutionList(CmdBase): def display_table(self, df: pd.DataFrame) -> None: @@ -74,12 +81,6 @@ def display_table(self, df: pd.DataFrame) -> None: start_index = end_index def run(self): - # Check if 'cmf' is configured - msg = "'cmf' is not configured.\nExecute 'cmf init' command." - result = dvc_get_config() - if len(result) == 0: - raise CmfNotConfigured(msg) - current_directory = os.getcwd() if not self.args.file_name: # If self.args.file_name is None or an empty list ([]). mlmd_file_name = "./mlmd" # Default path for mlmd file name. diff --git a/cmflib/commands/metadata/export.py b/cmflib/commands/metadata/export.py index a2c21177..3fdbaf06 100644 --- a/cmflib/commands/metadata/export.py +++ b/cmflib/commands/metadata/export.py @@ -21,8 +21,14 @@ from cmflib import cmfquery from cmflib.cli.command import CmdBase -from cmflib.dvc_wrapper import dvc_get_config -from cmflib.cmf_exception_handling import PipelineNotFound, FileNotFound, CmfNotConfigured, DuplicateArgumentNotAllowed, MissingArgument,NoChangesMadeInfo, MetadataExportToJson +from cmflib.cmf_exception_handling import ( + PipelineNotFound, + FileNotFound, + DuplicateArgumentNotAllowed, + MissingArgument, + NoChangesMadeInfo, + MetadataExportToJson +) # This class export local mlmd data to a json file class CmdMetadataExport(CmdBase): @@ -41,12 +47,6 @@ def create_full_path(self, current_directory: str, json_file_name: str) -> str: return "Provide path with file name." def run(self): - # Check if 'cmf' is configured. - msg = "'cmf' is not configured.\nExecute 'cmf init' command." - result = dvc_get_config() - if len(result) == 0: - raise CmfNotConfigured(msg) - current_directory = os.getcwd() full_path_to_dump = "" diff --git a/cmflib/commands/pipeline/list.py b/cmflib/commands/pipeline/list.py index 1b195cd6..511a66c7 100644 --- a/cmflib/commands/pipeline/list.py +++ b/cmflib/commands/pipeline/list.py @@ -19,17 +19,15 @@ from cmflib.cli.command import CmdBase from cmflib import cmfquery -from cmflib.dvc_wrapper import dvc_get_config -from cmflib.cmf_exception_handling import FileNotFound, CmfNotConfigured, DuplicateArgumentNotAllowed, MissingArgument, MsgSuccess +from cmflib.cmf_exception_handling import ( + FileNotFound, + DuplicateArgumentNotAllowed, + MissingArgument, + MsgSuccess +) class CmdPipelineList(CmdBase): def run(self): - # Check if 'cmf' is configured. - msg = "'cmf' is not configured.\nExecute 'cmf init' command." - result = dvc_get_config() - if len(result) == 0: - raise CmfNotConfigured(msg) - current_directory = os.getcwd() if not self.args.file_name: # If self.args.file_name is None or an empty list ([]). mlmd_file_name = "./mlmd" # Default path for mlmd file name. diff --git a/cmflib/storage_backends/amazonS3_artifacts.py b/cmflib/storage_backends/amazonS3_artifacts.py index 3d4a4b4b..b7abd0cc 100644 --- a/cmflib/storage_backends/amazonS3_artifacts.py +++ b/cmflib/storage_backends/amazonS3_artifacts.py @@ -172,6 +172,7 @@ def download_directory(self, total_files_in_directory = 1 return total_files_in_directory, files_downloaded, False + # this will never be called except self.s3.exceptions.ClientError as e: # If a specific error code is returned, the bucket does not exist if e.response['Error']['Code'] == '404': diff --git a/cmflib/storage_backends/local_artifacts.py b/cmflib/storage_backends/local_artifacts.py index 9e8b1af9..0fc2c831 100644 --- a/cmflib/storage_backends/local_artifacts.py +++ b/cmflib/storage_backends/local_artifacts.py @@ -16,7 +16,6 @@ import os from dvc.api import DVCFileSystem -from cmflib.cmf_exception_handling import ObjectDownloadSuccess class LocalArtifacts(): """ From 0dc64a6d63877027b3bdc8fa1003102fcc85b6a4 Mon Sep 17 00:00:00 2001 From: abhinav chobey Date: Fri, 20 Dec 2024 04:06:54 -0800 Subject: [PATCH 22/41] update --- cmflib/commands/artifact/pull.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/cmflib/commands/artifact/pull.py b/cmflib/commands/artifact/pull.py index d6b14971..f91fd26b 100644 --- a/cmflib/commands/artifact/pull.py +++ b/cmflib/commands/artifact/pull.py @@ -31,12 +31,14 @@ from cmflib.cmf_exception_handling import ( PipelineNotFound, FileNotFound, + MissingArgument, ExecutionsNotFound, ArtifactNotFound, BatchDownloadFailure, BatchDownloadSuccess, ObjectDownloadFailure, ObjectDownloadSuccess, + DuplicateArgumentNotAllowed, MsgSuccess, MsgFailure ) @@ -184,14 +186,23 @@ def run(self): current_directory = os.path.dirname(mlmd_file_name) if not os.path.exists(mlmd_file_name): #checking if MLMD files exists raise FileNotFound(mlmd_file_name, current_directory) - if not self.args.artifact_name[0]: # checking if user has not given -a as "" - raise ArtifactNotFound("") - if not self.args.pipeline_name[0]: # checking if user has not given -p as "" + + if not self.args.artifact_name: # If self.args.artifact_name is None or an empty list ([]). + pass + elif len(self.args.artifact_name) > 1: # If the user provided more than one artifact_name. + raise DuplicateArgumentNotAllowed("artifact_name", "-a") + elif not self.args.artifact_name[0]: # self.args.artifact_name[0] is an empty string (""). + raise MissingArgument("artifact name") + + if not self.args.pipeline_name: # checking if user has not given -p as "" raise PipelineNotFound(self.args.pipeline_name) + elif not self.args.pipeline_name[0]: + raise MissingArgument("pipeline name") + + query = cmfquery.CmfQuery(mlmd_file_name) if not query.get_pipeline_id(self.args.pipeline_name) > 0: #checking if pipeline name exists in mlmd raise PipelineNotFound(self.args.pipeline_name) - # getting all pipeline stages[i.e Prepare, Featurize, Train and Evaluate] stages = query.get_pipeline_stages(self.args.pipeline_name) executions = [] @@ -218,7 +229,6 @@ def run(self): ) # getting all artifacts with id temp_dict = dict(zip(get_artifacts['name'], get_artifacts['url'])) # getting dictionary of name and url pair name_url_dict.update(temp_dict) # updating name_url_dict with temp_dict - #print(name_url_dict) # name_url_dict = ('artifacts/parsed/test.tsv:6f597d341ceb7d8fbbe88859a892ef81', 'Test-env:/home/sharvark/local-storage/6f/597d341ceb7d8fbbe88859a892ef81' # name_url_dict = ('artifacts/parsed/test.tsv:6f597d341ceb7d8fbbe88859a892ef81', 'Test-env:/home/sharvark/local-storage/6f/597d341ceb7d8fbbe88859a892ef81,Second-env:/home/sharvark/local-storage/6f/597d341ceb7d8fbbe88859a892ef81') @@ -236,7 +246,6 @@ def run(self): download all files from directory """ - dvc_config_op = output if dvc_config_op["core.remote"] == "minio": minio_class_obj = minio_artifacts.MinioArtifacts(dvc_config_op) From fabf410d1a071f8c8ad2b91173ba8a005f1bff07 Mon Sep 17 00:00:00 2001 From: Varkha Sharma Date: Fri, 20 Dec 2024 04:44:00 -0800 Subject: [PATCH 23/41] addressed review comments --- cmflib/commands/artifact/pull.py | 8 ++++---- cmflib/storage_backends/local_artifacts.py | 11 ++++++----- cmflib/storage_backends/minio_artifacts.py | 21 +++++++++------------ server/app/main.py | 1 - 4 files changed, 19 insertions(+), 22 deletions(-) diff --git a/cmflib/commands/artifact/pull.py b/cmflib/commands/artifact/pull.py index f91fd26b..7e79648f 100644 --- a/cmflib/commands/artifact/pull.py +++ b/cmflib/commands/artifact/pull.py @@ -320,7 +320,7 @@ def run(self): print(f"object {object_name} downloaded at {download_loc}.") files_downloaded += 1 else: - print(f"object {object_name} is not downloaded ") + print(f"object {object_name} is not downloaded.") files_failed_to_download += 1 else: # If object name ends with `.dir`, download multiple files from a directory. @@ -414,7 +414,7 @@ def run(self): print(f"object {object_name} downloaded at {download_loc}.") files_downloaded += 1 else: - print(f"object {object_name} is not downloaded ") + print(f"object {object_name} is not downloaded.") files_failed_to_download += 1 else: # If object name ends with `.dir`, download multiple files from a directory. @@ -501,7 +501,7 @@ def run(self): print(f"object {object_name} downloaded at {download_loc}.") files_downloaded += 1 else: - print(f"object {object_name} is not downloaded ") + print(f"object {object_name} is not downloaded.") files_failed_to_download += 1 else: # If object name ends with `.dir`, download multiple files from a directory. @@ -654,7 +654,7 @@ def run(self): print(f"object {object_name} downloaded at {download_loc}.") files_downloaded += 1 else: - print(f"object {object_name} is not downloaded ") + print(f"object {object_name} is not downloaded.") files_failed_to_download += 1 else: total_files_in_directory, dir_files_downloaded, download_flag = amazonS3_class_obj.download_directory( diff --git a/cmflib/storage_backends/local_artifacts.py b/cmflib/storage_backends/local_artifacts.py index 0fc2c831..e3725a94 100644 --- a/cmflib/storage_backends/local_artifacts.py +++ b/cmflib/storage_backends/local_artifacts.py @@ -70,7 +70,7 @@ def download_file( else: return object_name, download_loc, False except Exception as e: - return object_name, download_loc, False + return object_name, download_loc, False def download_directory( @@ -156,13 +156,14 @@ def download_directory( if (total_files_in_directory - files_downloaded) == 0: return total_files_in_directory, files_downloaded, True else: - return total_files_in_directory, files_downloaded, False + return total_files_in_directory, files_downloaded, False # this exception is for get_file() function for object_name except Exception as e: print(f"object {object_name} is not downloaded.") - # need to improve this # We usually don't count .dir as a file while counting total_files_in_directory. - # However, here we failed to download the .dir folder itself. So we need to make - # total_files_in_directory = 1, because .............. + # However, here we failed to download the .dir folder itself. + # So we need to make, total_files_in_directory = 1 total_files_in_directory = 1 return total_files_in_directory, files_downloaded, False + + # sometimes we get TypeError as an execption, however investiagtion for the exact scenarios is pending diff --git a/cmflib/storage_backends/minio_artifacts.py b/cmflib/storage_backends/minio_artifacts.py index edbc3f16..5531d4fd 100644 --- a/cmflib/storage_backends/minio_artifacts.py +++ b/cmflib/storage_backends/minio_artifacts.py @@ -71,6 +71,9 @@ def download_file( return object_name, download_loc, True else: return object_name, download_loc, False + except S3Error as exception: + print(exception) + return object_name, download_loc, False except Exception as e: return object_name, download_loc, False @@ -161,7 +164,11 @@ def download_directory( if (total_files_in_directory - files_downloaded) == 0: return total_files_in_directory, files_downloaded, True else: - return total_files_in_directory, files_downloaded, False + return total_files_in_directory, files_downloaded, False + except S3Error as exception: + print(exception) + total_files_in_directory = 1 + return total_files_in_directory, files_downloaded, False except Exception as e: print(f"object {object_name} is not downloaded.") # need to improve this @@ -171,14 +178,4 @@ def download_directory( total_files_in_directory = 1 return total_files_in_directory, files_downloaded, False - - - - - - - # except TypeError as exception: - # #print("inside ") - # return exception - # except S3Error as exception: - # return exception + # sometimes we get TypeError as an execption, however investiagtion for the exact scenarios is pending diff --git a/server/app/main.py b/server/app/main.py index 2b4fbae9..9fe72a03 100644 --- a/server/app/main.py +++ b/server/app/main.py @@ -120,7 +120,6 @@ async def mlmd_push(info: Request): @app.get("/mlmd_pull/{pipeline_name}", response_class=HTMLResponse) async def mlmd_pull(info: Request, pipeline_name: str): # checks if mlmd file exists on server - print("inside api") req_info = await info.json() if os.path.exists(server_store_path): #json_payload values can be json data, NULL or no_exec_id. From e2ec3fd481ac8f315a16d1a53ecee2adc3bf7563 Mon Sep 17 00:00:00 2001 From: Varkha Sharma Date: Fri, 20 Dec 2024 04:47:21 -0800 Subject: [PATCH 24/41] addressed review comments --- cmflib/cmf_exception_handling.py | 147 +++++++++++++++++++++---------- 1 file changed, 100 insertions(+), 47 deletions(-) diff --git a/cmflib/cmf_exception_handling.py b/cmflib/cmf_exception_handling.py index 14958378..5cc80678 100644 --- a/cmflib/cmf_exception_handling.py +++ b/cmflib/cmf_exception_handling.py @@ -17,42 +17,48 @@ #!/usr/bin/env python3 from typing import Optional, List + class CmfResponse(Exception): """ - Response and Exceptions raised by the CMF. - CmfResponse includes two child classes, - 1. CmfSuccess - 2. CmfFailure - On the basis of success and failure various child classes are created. + Response and Exceptions raised by the CMF. + CmfResponse includes two child classes, + 1. CmfSuccess + 2. CmfFailure + On the basis of success and failure various child classes are created. - Base class for all the cmf responses and exceptions. + Base class for all the cmf responses and exceptions. """ - def __init__(self, return_code=None, status="failure", *args): + def __init__(self, return_code=None, status="failure", *args): self.return_code = return_code self.status = status super().__init__(*args) + class CmfFailure(CmfResponse): def __init__(self, return_code=None, *args): super().__init__(return_code, status="failure", *args) + # Subclass for Success Cases class CmfSuccess(CmfResponse): def __init__(self, return_code=None, *args): super().__init__(return_code, status="success", *args) -'''CMF Success Class''' + +"""CMF Success Class""" + class ExecutionsAlreadyExists(CmfSuccess): def __init__(self, return_code=201): super().__init__(return_code) - + def handle(self): return "INFO: Executions already exists." + class ObjectDownloadSuccess(CmfSuccess): - def __init__(self,object_name,download_loc, return_code=202): + def __init__(self, object_name, download_loc, return_code=202): self.object_name = object_name self.download_loc = download_loc super().__init__(return_code) @@ -60,39 +66,46 @@ def __init__(self,object_name,download_loc, return_code=202): def handle(self): return f"SUCCESS: Object {self.object_name} downloaded at {self.download_loc}." + class BatchDownloadSuccess(CmfSuccess): - def __init__(self,files_downloaded, return_code=203): + def __init__(self, files_downloaded, return_code=203): self.files_downloaded = files_downloaded super().__init__(return_code) def handle(self): return f"SUCCESS: Number of files downloaded = {self.files_downloaded }." + class MlmdFilePullSuccess(CmfSuccess): - def __init__(self,full_path_to_dump, return_code=204): + def __init__(self, full_path_to_dump, return_code=204): self.full_path_to_dump = full_path_to_dump super().__init__(return_code) def handle(self): return f"SUCCESS: {self.full_path_to_dump} is successfully pulled." - + + class MlmdFilePushSuccess(CmfSuccess): def __init__(self, file_name, return_code=205): self.file_name = file_name super().__init__(return_code) - + def handle(self): return f"SUCCESS: {self.file_name} is successfully pushed." + class TensorboardPushSuccess(CmfSuccess): - def __init__(self, tensorboard_file_name:str = "All", return_code=206): + def __init__(self, tensorboard_file_name: str = "All", return_code=206): self.tensorboard_file_name = tensorboard_file_name super().__init__(return_code) def handle(self): if self.tensorboard_file_name == "All": - return f"SUCCESS: All tensorboard logs pushed successfully." - return f"tensorboard logs: file {self.tensorboard_file_name} pushed successfully." + return f"SUCCESS: All tensorboard logs pushed successfully." + return ( + f"tensorboard logs: file {self.tensorboard_file_name} pushed successfully." + ) + class CmfInitComplete(CmfSuccess): def __init__(self, return_code=207): @@ -101,8 +114,9 @@ def __init__(self, return_code=207): def handle(self): return "SUCCESS: cmf init complete." + class CmfInitShow(CmfSuccess): - def __init__(self,result, attr_str, return_code=208): + def __init__(self, result, attr_str, return_code=208): self.result = result self.attr_str = attr_str super().__init__(return_code) @@ -110,6 +124,7 @@ def __init__(self,result, attr_str, return_code=208): def handle(self): return f"{self.result}\n{self.attr_str}" + class ArtifactPushSuccess(CmfSuccess): def __init__(self, message, return_code=209): self.message = message @@ -118,17 +133,24 @@ def __init__(self, message, return_code=209): def handle(self): return self.message + class MetadataExportToJson(CmfSuccess): - def __init__(self,full_path_to_dump, return_code=210): + def __init__(self, full_path_to_dump, return_code=210): self.full_path_to_dump = full_path_to_dump super().__init__(return_code) def handle(self): return f"SUCCESS: metadata successfully exported in {self.full_path_to_dump}." + # This class is created for messages like "Done", "Records not found" class MsgSuccess(CmfSuccess): - def __init__(self,msg_str: Optional[str] = None, msg_list: Optional[List[str]] = None, return_code=211): + def __init__( + self, + msg_str: Optional[str] = None, + msg_list: Optional[List[str]] = None, + return_code=211, + ): self.msg_str = msg_str self.msg_list = msg_list super().__init__(return_code) @@ -139,67 +161,75 @@ def handle(self): else: return self.msg_str -''' CMF FAILURE CLASSES''' + +""" CMF FAILURE CLASSES""" + class PipelineNotFound(CmfFailure): - def __init__(self,pipeline_name,return_code=101): - self.pipeline_name = pipeline_name + def __init__(self, pipeline_name, return_code=101): + self.pipeline_name = pipeline_name super().__init__(return_code) def handle(self): return f"ERROR: Pipeline name {self.pipeline_name} doesn't exist." - + class FileNotFound(CmfFailure): def __init__(self, file_name, directory, return_code=102): self.directory = directory - self.file_name =file_name + self.file_name = file_name super().__init__(return_code) - + def handle(self): return f"ERROR: File {self.file_name} doesn't exists in {self.directory} directory." + class BucketNotFound(CmfFailure): - def __init__(self,bucket_name, return_code=103): + def __init__(self, bucket_name, return_code=103): self.bucket_name = bucket_name super().__init__(return_code) - + def handle(self): return f"ERROR: Bucket {self.bucket_name} doesn't exist." + class ExecutionsNotFound(CmfFailure): def __init__(self, return_code=104): super().__init__(return_code) - + def handle(self): return f"ERROR: Executions not found." - + + class ExecutionIDNotFound(CmfFailure): - def __init__(self,exec_id, return_code=105): + def __init__(self, exec_id, return_code=105): self.exec_id = exec_id super().__init__(return_code) - + def handle(self): return f"ERROR: Execution id {self.exec_id} is not present in mlmd." - + + class ArtifactNotFound(CmfFailure): - def __init__(self,artifact_name, return_code=106): + def __init__(self, artifact_name, return_code=106): self.artifact_name = artifact_name super().__init__(return_code) - + def handle(self): return f"ERROR: Artifact {self.artifact_name} not found." - + + class ObjectDownloadFailure(CmfFailure): - def __init__(self,object_name, return_code=107): + def __init__(self, object_name, return_code=107): self.object_name = object_name super().__init__(return_code) def handle(self): return f"Object {self.object_name} is not downloaded." - + + class BatchDownloadFailure(CmfFailure): - def __init__(self,files_downloaded, Files_failed_to_download, return_code=108): + def __init__(self, files_downloaded, Files_failed_to_download, return_code=108): self.files_downloaded = files_downloaded self.Files_failed_to_download = Files_failed_to_download super().__init__(return_code) @@ -207,21 +237,24 @@ def __init__(self,files_downloaded, Files_failed_to_download, return_code=108): def handle(self): return f"INFO: Number of files downloaded = {self.files_downloaded }. Files failed to download = {self.Files_failed_to_download}." + class Minios3ServerInactive(CmfFailure): - def __init__(self,return_code=109): + def __init__(self, return_code=109): super().__init__(return_code) def handle(self): return f"ERROR: MinioS3 server is not running!!!" + class CmfNotConfigured(CmfFailure): - def __init__(self,message, return_code=110): + def __init__(self, message, return_code=110): self.message = message super().__init__(return_code) def handle(self): return self.message + class MlmdNotFoundOnServer(CmfFailure): def __init__(self, return_code=111): super().__init__(return_code) @@ -229,6 +262,7 @@ def __init__(self, return_code=111): def handle(self): return "ERROR: Metadata file not available on cmf-server." + class UpdateCmfVersion(CmfFailure): def __init__(self, return_code=112): super().__init__(return_code) @@ -236,8 +270,9 @@ def __init__(self, return_code=112): def handle(self): return "ERROR: You need to update cmf to the latest version. Unable to push metadata file." + class TensorboardPushFailure(CmfFailure): - def __init__(self,tensorboard_file_name,response_text, return_code=113): + def __init__(self, tensorboard_file_name, response_text, return_code=113): self.tensorboard_file_name = tensorboard_file_name self.response_text = response_text super().__init__(return_code) @@ -245,6 +280,7 @@ def __init__(self,tensorboard_file_name,response_text, return_code=113): def handle(self): return f"ERROR: Failed to upload file {self.tensorboard_file_name}. Server response: {self.response_text}." + class Neo4jArgumentNotProvided(CmfFailure): def __init__(self, return_code=114): super().__init__(return_code) @@ -252,6 +288,7 @@ def __init__(self, return_code=114): def handle(self): return "ERROR: Provide user, password and uri for neo4j initialization." + class CmfInitFailed(CmfFailure): def __init__(self, return_code=115): super().__init__(return_code) @@ -259,6 +296,7 @@ def __init__(self, return_code=115): def handle(self): return "ERROR: cmf init failed." + class CmfServerNotAvailable(CmfFailure): def __init__(self, return_code=116): super().__init__(return_code) @@ -266,6 +304,7 @@ def __init__(self, return_code=116): def handle(self): return "ERROR: cmf-server is not available." + class InternalServerError(CmfFailure): def __init__(self, return_code=117): super().__init__(return_code) @@ -273,6 +312,7 @@ def __init__(self, return_code=117): def handle(self): return "cmf-server error: The server encountered an unexpected error." + class MlmdFilePullFailure(CmfFailure): def __init__(self, return_code=118): super().__init__(return_code) @@ -280,14 +320,16 @@ def __init__(self, return_code=118): def handle(self): return "ERROR: Unable to pull metadata file." + class DirectoryNotfound(CmfFailure): - def __init__(self,dir, return_code=119): + def __init__(self, dir, return_code=119): self.dir = dir super().__init__(return_code) def handle(self): return f"ERROR: {self.dir} doesn't exists." + class FileNameNotfound(CmfFailure): def __init__(self, return_code=120): super().__init__(return_code) @@ -295,6 +337,7 @@ def __init__(self, return_code=120): def handle(self): return "ERROR: Provide path with file name." + class NoDataFoundOsdf(CmfFailure): def __init__(self, return_code=121): super().__init__(return_code) @@ -302,6 +345,7 @@ def __init__(self, return_code=121): def handle(self): return "ERROR: No data received from the server." + class InvalidTensorboardFilePath(CmfFailure): def __init__(self, return_code=122): super().__init__(return_code) @@ -309,8 +353,9 @@ def __init__(self, return_code=122): def handle(self): return "ERROR: Invalid tensorboard logs path. Provide valid file/folder path for tensorboard logs!!" + class DuplicateArgumentNotAllowed(CmfFailure): - def __init__(self,argument_name, argument_flag, return_code=123): + def __init__(self, argument_name, argument_flag, return_code=123): self.argument_flag = argument_flag self.argument_name = argument_name super().__init__(return_code) @@ -318,23 +363,31 @@ def __init__(self,argument_name, argument_flag, return_code=123): def handle(self): return f"Error: You can only provide one {self.argument_name} using the {self.argument_flag} flag." + class MissingArgument(CmfFailure): - def __init__(self,argument_name,return_code=124): + def __init__(self, argument_name, return_code=124): self.argument_name = argument_name super().__init__(return_code) def handle(self): return f"Error: Missing {self.argument_name}" + class NoChangesMadeInfo(CmfFailure): - def __init__(self,return_code=125): + def __init__(self, return_code=125): super().__init__(return_code) def handle(self): return "INFO: No changes made to the file. Operation aborted." + class MsgFailure(CmfFailure): - def __init__(self,msg_str: Optional[str] = None, msg_list: Optional[List[str]] = None, return_code=126): + def __init__( + self, + msg_str: Optional[str] = None, + msg_list: Optional[List[str]] = None, + return_code=126, + ): self.msg_str = msg_str self.msg_list = msg_list super().__init__(return_code) From 4e0377fa89f646c22ba7fed3c72d9317abeb224f Mon Sep 17 00:00:00 2001 From: Varkha Sharma Date: Fri, 20 Dec 2024 05:08:11 -0800 Subject: [PATCH 25/41] addressed revieew comments --- cmflib/commands/metadata/pull.py | 13 ++++++++++++- cmflib/commands/metadata/push.py | 16 ++++++++++++++-- cmflib/storage_backends/sshremote_artifacts.py | 13 ++----------- 3 files changed, 28 insertions(+), 14 deletions(-) diff --git a/cmflib/commands/metadata/pull.py b/cmflib/commands/metadata/pull.py index 24209519..2ad40736 100644 --- a/cmflib/commands/metadata/pull.py +++ b/cmflib/commands/metadata/pull.py @@ -22,7 +22,18 @@ from cmflib.cli.utils import find_root from cmflib.server_interface import server_interface from cmflib.utils.cmf_config import CmfConfig -from cmflib.cmf_exception_handling import PipelineNotFound, CmfNotConfigured, ExecutionIDNotFound, MlmdNotFoundOnServer, MlmdFilePullSuccess, CmfServerNotAvailable, InternalServerError, MlmdFilePullFailure, DirectoryNotfound, FileNameNotfound +from cmflib.cmf_exception_handling import ( + PipelineNotFound, + CmfNotConfigured, ExecutionIDNotFound, + MlmdNotFoundOnServer, + MlmdFilePullSuccess, + CmfServerNotAvailable, + InternalServerError, + MlmdFilePullFailure, + DirectoryNotfound, + FileNameNotfound +) + # This class pulls mlmd file from cmf-server class CmdMetadataPull(CmdBase): def run(self): diff --git a/cmflib/commands/metadata/push.py b/cmflib/commands/metadata/push.py index 0ece69b4..f78088bd 100644 --- a/cmflib/commands/metadata/push.py +++ b/cmflib/commands/metadata/push.py @@ -23,8 +23,20 @@ from cmflib.cli.utils import find_root from cmflib.server_interface import server_interface from cmflib.utils.cmf_config import CmfConfig -from cmflib.cmf_exception_handling import TensorboardPushSuccess, TensorboardPushFailure, MlmdFilePushSuccess, ExecutionsAlreadyExists -from cmflib.cmf_exception_handling import FileNotFound, ExecutionIDNotFound, PipelineNotFound, ExecutionsAlreadyExists, UpdateCmfVersion, CmfServerNotAvailable, InternalServerError, CmfNotConfigured, InvalidTensorboardFilePath +from cmflib.cmf_exception_handling import ( + TensorboardPushSuccess, + TensorboardPushFailure, + MlmdFilePushSuccess, + ExecutionsAlreadyExists, + FileNotFound, + ExecutionIDNotFound, + PipelineNotFound, + UpdateCmfVersion, + CmfServerNotAvailable, + InternalServerError, + CmfNotConfigured, + InvalidTensorboardFilePath +) # This class pushes mlmd file to cmf-server class CmdMetadataPush(CmdBase): def run(self): diff --git a/cmflib/storage_backends/sshremote_artifacts.py b/cmflib/storage_backends/sshremote_artifacts.py index 01d81454..c500aa36 100644 --- a/cmflib/storage_backends/sshremote_artifacts.py +++ b/cmflib/storage_backends/sshremote_artifacts.py @@ -159,17 +159,8 @@ def download_directory( sftp.close() ssh.close() print(f"object {object_name} is not downloaded.") - # need to improve this # We usually don't count .dir as a file while counting total_files_in_directory. - # However, here we failed to download the .dir folder itself. So we need to make - # total_files_in_directory = 1, because .............. + # However, here we failed to download the .dir folder itself. + # So we need to make, total_files_in_directory = 1 total_files_in_directory = 1 return total_files_in_directory, files_downloaded, False - - - - - # except TypeError as exception: - # return exception - # except Exception as exception: - # return exception From 968d3dd45254d0b0f14f2836a6b46321cc6fbb1c Mon Sep 17 00:00:00 2001 From: abhinav chobey Date: Fri, 20 Dec 2024 06:57:25 -0800 Subject: [PATCH 26/41] updated except block in amazons3 --- cmflib/commands/artifact/pull.py | 15 ------ cmflib/commands/artifact/push.py | 4 +- cmflib/storage_backends/amazonS3_artifacts.py | 46 +++++++++++-------- 3 files changed, 28 insertions(+), 37 deletions(-) diff --git a/cmflib/commands/artifact/pull.py b/cmflib/commands/artifact/pull.py index 7e79648f..f4e228e4 100644 --- a/cmflib/commands/artifact/pull.py +++ b/cmflib/commands/artifact/pull.py @@ -186,20 +186,6 @@ def run(self): current_directory = os.path.dirname(mlmd_file_name) if not os.path.exists(mlmd_file_name): #checking if MLMD files exists raise FileNotFound(mlmd_file_name, current_directory) - - if not self.args.artifact_name: # If self.args.artifact_name is None or an empty list ([]). - pass - elif len(self.args.artifact_name) > 1: # If the user provided more than one artifact_name. - raise DuplicateArgumentNotAllowed("artifact_name", "-a") - elif not self.args.artifact_name[0]: # self.args.artifact_name[0] is an empty string (""). - raise MissingArgument("artifact name") - - if not self.args.pipeline_name: # checking if user has not given -p as "" - raise PipelineNotFound(self.args.pipeline_name) - elif not self.args.pipeline_name[0]: - raise MissingArgument("pipeline name") - - query = cmfquery.CmfQuery(mlmd_file_name) if not query.get_pipeline_id(self.args.pipeline_name) > 0: #checking if pipeline name exists in mlmd raise PipelineNotFound(self.args.pipeline_name) @@ -235,7 +221,6 @@ def run(self): output = DvcConfig.get_dvc_config() # pulling dvc config if type(output) is not dict: return output - """ There are multiple scenarios for cmf artifact pull Code checks if self.args.artifact_name is provided by user or not diff --git a/cmflib/commands/artifact/push.py b/cmflib/commands/artifact/push.py index 1cfa1a82..9360bd77 100644 --- a/cmflib/commands/artifact/push.py +++ b/cmflib/commands/artifact/push.py @@ -28,7 +28,7 @@ from cmflib.dvc_wrapper import dvc_add_attribute from cmflib.cli.utils import find_root from cmflib.utils.cmf_config import CmfConfig -from cmflib.cmf_exception_handling import PipelineNotFound, Minios3ServerInactive, FileNotFound, ExecutionsNotFound, CmfNotConfigured, ArtifactPushSuccess +from cmflib.cmf_exception_handling import PipelineNotFound, Minios3ServerInactive, FileNotFound, ExecutionsNotFound, CmfNotConfigured, ArtifactPushSuccess, MissingArgument, DuplicateArgumentNotAllowed class CmdArtifactPush(CmdBase): def run(self): @@ -72,7 +72,7 @@ def run(self): raise FileNotFound(mlmd_file_name, current_directory) # creating cmfquery object query = cmfquery.CmfQuery(mlmd_file_name) - + # Put a check to see whether pipline exists or not pipeline_name = self.args.pipeline_name if not query.get_pipeline_id(pipeline_name) > 0: diff --git a/cmflib/storage_backends/amazonS3_artifacts.py b/cmflib/storage_backends/amazonS3_artifacts.py index b7abd0cc..de9cafcb 100644 --- a/cmflib/storage_backends/amazonS3_artifacts.py +++ b/cmflib/storage_backends/amazonS3_artifacts.py @@ -78,6 +78,14 @@ def download_file( return object_name, download_loc, True else: return object_name, download_loc, False + except self.s3.exceptions.ClientError as e: + # If a specific error code is returned, the bucket does not exist + if e.response['Error']['Code'] == '404': + print(f"{bucket_name} doesn't exists!!") + return object_name, download_loc, False + else: + print(e) + return object_name, download_loc, False except Exception as e: return object_name, download_loc, False @@ -147,14 +155,12 @@ def download_directory(self, formatted_md5 = md5_val[:2] + '/' + md5_val[2:] temp_download_loc = f"{download_loc}/{relpath}" temp_object_name = f"{repo_path}/{formatted_md5}" - try: - obj = self.s3.download_file(bucket_name, temp_object_name, temp_download_loc) - if obj == None: - files_downloaded += 1 - print(f"object {temp_object_name} downloaded at {temp_download_loc}.") - else: - print(f"object {temp_object_name} is not downloaded.") - except Exception as e: + + obj = self.s3.download_file(bucket_name, temp_object_name, temp_download_loc) + if obj == None: + files_downloaded += 1 + print(f"object {temp_object_name} downloaded at {temp_download_loc}.") + else: print(f"object {temp_object_name} is not downloaded.") # Check if all files were successfully downloaded. @@ -162,6 +168,16 @@ def download_directory(self, return total_files_in_directory, files_downloaded, True else: return total_files_in_directory, files_downloaded, False + except self.s3.exceptions.ClientError as e: + # If a specific error code is returned, the bucket does not exist + if e.response['Error']['Code'] == '404': + print(f"{bucket_name} doesn't exists!!") + total_files_in_directory = 1 + return total_files_in_directory, files_downloaded, False + else: + print(e) + total_files_in_directory = 1 + return total_files_in_directory, files_downloaded, False except Exception as e: print(f"object {object_name} is not downloaded.") # Handle failure to download the .dir metadata. @@ -172,15 +188,5 @@ def download_directory(self, total_files_in_directory = 1 return total_files_in_directory, files_downloaded, False - # this will never be called - except self.s3.exceptions.ClientError as e: - # If a specific error code is returned, the bucket does not exist - if e.response['Error']['Code'] == '404': - return f"{bucket_name} doesn't exists!!" - else: - # Handle other errors - raise - except TypeError as exception: - return exception - except Exception as e: - return e \ No newline at end of file + + \ No newline at end of file From cbc1d225ada5dd3803a4323621381d599e7ee5cd Mon Sep 17 00:00:00 2001 From: abhinavchobey <111754147+abhinavchobey@users.noreply.github.com> Date: Tue, 24 Dec 2024 12:24:20 +0530 Subject: [PATCH 27/41] Update pull.py --- cmflib/commands/artifact/pull.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmflib/commands/artifact/pull.py b/cmflib/commands/artifact/pull.py index f4e228e4..f1e85918 100644 --- a/cmflib/commands/artifact/pull.py +++ b/cmflib/commands/artifact/pull.py @@ -43,6 +43,7 @@ MsgFailure ) from cmflib.cli.utils import check_minio_server +from cmflib.cmf_exception_handling import CmfNotConfigured class CmdArtifactPull(CmdBase): @@ -220,7 +221,7 @@ def run(self): # name_url_dict = ('artifacts/parsed/test.tsv:6f597d341ceb7d8fbbe88859a892ef81', 'Test-env:/home/sharvark/local-storage/6f/597d341ceb7d8fbbe88859a892ef81,Second-env:/home/sharvark/local-storage/6f/597d341ceb7d8fbbe88859a892ef81') output = DvcConfig.get_dvc_config() # pulling dvc config if type(output) is not dict: - return output + raise CmfNotConfigured(output) """ There are multiple scenarios for cmf artifact pull Code checks if self.args.artifact_name is provided by user or not From 282b14a355ac07ca1e8c1a04853324e8219f2107 Mon Sep 17 00:00:00 2001 From: AyeshaSanadi Date: Thu, 26 Dec 2024 17:31:06 +0530 Subject: [PATCH 28/41] Added validation --- cmflib/commands/artifact/pull.py | 7 ++++ cmflib/commands/artifact/push.py | 32 +++++++++++----- cmflib/commands/metadata/push.py | 13 ++++--- cmflib/commands/repo/pull.py | 32 ++++++++-------- cmflib/commands/repo/push.py | 64 +++++++++++++++++--------------- 5 files changed, 89 insertions(+), 59 deletions(-) diff --git a/cmflib/commands/artifact/pull.py b/cmflib/commands/artifact/pull.py index c6baffcd..b7e7396b 100644 --- a/cmflib/commands/artifact/pull.py +++ b/cmflib/commands/artifact/pull.py @@ -187,6 +187,7 @@ def run(self): mlmd_file_name = self.args.file_name if mlmd_file_name == "mlmd": mlmd_file_name = "./mlmd" + print("mlmd_file_name",mlmd_file_name) current_directory = os.path.dirname(mlmd_file_name) if not os.path.exists(mlmd_file_name): #checking if MLMD files exists raise FileNotFound(mlmd_file_name, current_directory) @@ -197,9 +198,11 @@ def run(self): stages = query.get_pipeline_stages(self.args.pipeline_name) executions = [] identifiers = [] + print("stages:",stages) for stage in stages: # getting all executions for stages executions = query.get_all_executions_in_stage(stage) + print("executions:",executions) # check if stage has executions if len(executions) > 0: # converting it to dictionary @@ -213,16 +216,20 @@ def run(self): name_url_dict = {} if len(identifiers) == 0: # check if there are no executions raise ExecutionsNotFound() + print("identifiers: ", identifiers) for identifier in identifiers: get_artifacts = query.get_all_artifacts_for_execution( identifier ) # getting all artifacts with id + print("get_artifacts: ",get_artifacts) temp_dict = dict(zip(get_artifacts['name'], get_artifacts['url'])) # getting dictionary of name and url pair name_url_dict.update(temp_dict) # updating name_url_dict with temp_dict #print(name_url_dict) # name_url_dict = ('artifacts/parsed/test.tsv:6f597d341ceb7d8fbbe88859a892ef81', 'Test-env:/home/sharvark/local-storage/6f/597d341ceb7d8fbbe88859a892ef81' # name_url_dict = ('artifacts/parsed/test.tsv:6f597d341ceb7d8fbbe88859a892ef81', 'Test-env:/home/sharvark/local-storage/6f/597d341ceb7d8fbbe88859a892ef81,Second-env:/home/sharvark/local-storage/6f/597d341ceb7d8fbbe88859a892ef81') + # print('i am here') output = DvcConfig.get_dvc_config() # pulling dvc config + if type(output) is not dict: raise CmfNotConfigured(output) """ diff --git a/cmflib/commands/artifact/push.py b/cmflib/commands/artifact/push.py index 5a7fe7d8..1f2813e5 100644 --- a/cmflib/commands/artifact/push.py +++ b/cmflib/commands/artifact/push.py @@ -31,9 +31,6 @@ from cmflib.cmf_exception_handling import PipelineNotFound, Minios3ServerInactive, FileNotFound, ExecutionsNotFound, CmfNotConfigured, ArtifactPushSuccess, MissingArgument, DuplicateArgumentNotAllowed class CmdArtifactPush(CmdBase): - def __init__(self, args): - self.args = args - def run(self): result = "" dvc_config_op = DvcConfig.get_dvc_config() @@ -66,18 +63,30 @@ def run(self): # Default path of mlmd file mlmd_file_name = "./mlmd" current_directory = os.getcwd() - if self.args.file_name: - mlmd_file_name = self.args.file_name + if not self.args.file_name: # If self.args.file_name is None or an empty list ([]). + mlmd_file_name = "./mlmd" # Default path for mlmd file name. + elif len(self.args.file_name) > 1: # If the user provided more than one file name. + raise DuplicateArgumentNotAllowed("file_name", "-f") + elif not self.args.file_name[0]: # self.args.file_name[0] is an empty string (""). + raise MissingArgument("file name") + else: + mlmd_file_name = self.args.file_name[0].strip() if mlmd_file_name == "mlmd": mlmd_file_name = "./mlmd" - current_directory = os.path.dirname(mlmd_file_name) + current_directory = os.path.dirname(mlmd_file_name) if not os.path.exists(mlmd_file_name): raise FileNotFound(mlmd_file_name, current_directory) # creating cmfquery object query = cmfquery.CmfQuery(mlmd_file_name) # Put a check to see whether pipline exists or not - pipeline_name = self.args.pipeline_name + if self.args.pipeline_name is not None and len(self.args.pipeline_name) > 1: + raise DuplicateArgumentNotAllowed("pipeline_name", "-p") + elif not self.args.pipeline_name[0]: # self.args.pipeline_name[0] is an empty string (""). + raise MissingArgument("pipeline name") + else: + pipeline_name = self.args.pipeline_name[0] + if not query.get_pipeline_id(pipeline_name) > 0: raise PipelineNotFound(pipeline_name) @@ -127,7 +136,7 @@ def run(self): #print("file_set = ", final_list) result = dvc_push(list(final_list)) return ArtifactPushSuccess(result) - + def add_parser(subparsers, parent_parser): HELP = "Push artifacts to the user configured artifact repo." @@ -145,12 +154,17 @@ def add_parser(subparsers, parent_parser): "-p", "--pipeline_name", required=True, + action="append", help="Specify Pipeline name.", metavar="", ) parser.add_argument( - "-f", "--file_name", help="Specify mlmd file name.", metavar="" + "-f", + "--file_name", + action="append", + help="Specify mlmd file name.", + metavar="" ) parser.set_defaults(func=CmdArtifactPush) diff --git a/cmflib/commands/metadata/push.py b/cmflib/commands/metadata/push.py index 10098714..5178a0b0 100644 --- a/cmflib/commands/metadata/push.py +++ b/cmflib/commands/metadata/push.py @@ -39,9 +39,6 @@ ) # This class pushes mlmd file to cmf-server class CmdMetadataPush(CmdBase): - def __init__(self, args): - self.args = args - def run(self): # Get url from config cmfconfig = os.environ.get("CONFIG_FILE",".cmfconfig") @@ -107,7 +104,7 @@ def run(self): output = MlmdFilePushSuccess(mlmd_file_name) if response.json()["status"]=="exists": display_output = "Executions already exists." - output = ExecutionsAlreadyExists + output = ExecutionsAlreadyExists() if not self.args.tensorboard: return output @@ -173,17 +170,23 @@ def add_parser(subparsers, parent_parser): "-p", "--pipeline_name", required=True, + action="append", help="Specify Pipeline name.", metavar="", ) parser.add_argument( - "-f", "--file_name", help="Specify mlmd file name.", metavar="" + "-f", + "--file_name", + action="append", + help="Specify mlmd file name.", + metavar="" ) parser.add_argument( "-e", "--execution", + action="append", help="Specify Execution id.", metavar="", ) diff --git a/cmflib/commands/repo/pull.py b/cmflib/commands/repo/pull.py index 52e9a995..f4fc310c 100644 --- a/cmflib/commands/repo/pull.py +++ b/cmflib/commands/repo/pull.py @@ -65,27 +65,27 @@ def git_pull(self): def run(self): # check whether dvc is configured or not - msg = "'cmf' is not configured.\nExecute 'cmf init' command." - result = dvc_get_config() - if len(result) == 0: - return msg + # msg = "'cmf' is not configured.\nExecute 'cmf init' command." + # result = dvc_get_config() + # if len(result) == 0: + # return msg - current_directory = os.getcwd() - mlmd_file_name = "./mlmd" - if self.args.file_name: - mlmd_file_name = self.args.file_name - if mlmd_file_name == "mlmd": - mlmd_file_name = "./mlmd" - current_directory = os.path.dirname(mlmd_file_name) + # current_directory = os.getcwd() + # mlmd_file_name = "./mlmd" + # if self.args.file_name: + # mlmd_file_name = self.args.file_name + # if mlmd_file_name == "mlmd": + # mlmd_file_name = "./mlmd" + # current_directory = os.path.dirname(mlmd_file_name) - if not os.path.exists(mlmd_file_name): - return f"ERROR: {mlmd_file_name} doesn't exists in {current_directory} directory." - else: + # if not os.path.exists(mlmd_file_name): + # return f"ERROR: {mlmd_file_name} doesn't exists in {current_directory} directory." + # else: instance_of_artifact = CmdArtifactPull(self.args) - if instance_of_artifact.run(): + if instance_of_artifact.run().status == "success": print("metadata pull started...") instance_of_metadata = CmdMetadataPull(self.args) - if instance_of_metadata.run(): + if instance_of_metadata.run().status == "success": return self.git_pull() diff --git a/cmflib/commands/repo/push.py b/cmflib/commands/repo/push.py index ca88d876..83b806e0 100644 --- a/cmflib/commands/repo/push.py +++ b/cmflib/commands/repo/push.py @@ -28,9 +28,6 @@ class CmdRepoPush(CmdBase): - def __init__(self, args): - self.args = args - def run_command(self, command, cwd=None): process = subprocess.Popen(command, cwd=cwd, shell=True, stdout=subprocess.PIPE, @@ -77,38 +74,40 @@ def git_push(self): def run(self): - # check whether dvc is configured or not - msg = "'cmf' is not configured.\nExecute 'cmf init' command." - result = dvc_get_config() - if len(result) == 0: - return msg + # # check whether dvc is configured or not + # msg = "'cmf' is not configured.\nExecute 'cmf init' command." + # result = dvc_get_config() + # if len(result) == 0: + # return msg - current_directory = os.getcwd() - mlmd_file_name = "./mlmd" - - # check whether mlmd file exists or not - if self.args.file_name: - mlmd_file_name = self.args.file_name - current_directory = os.path.dirname(self.args.file_name) - - # checks if mlmd file is present in current directory or given directory - if not os.path.exists(mlmd_file_name): - return f"ERROR: {mlmd_file_name} doesn't exists in the {current_directory}." - else: - # creating cmfquery object - query = cmfquery.CmfQuery(mlmd_file_name) - # Put a check to see whether pipline exists or not - pipeline_name = self.args.pipeline_name - if not query.get_pipeline_id(pipeline_name) > 0: - return f"ERROR: Pipeline {pipeline_name} doesn't exist!!" + # current_directory = os.getcwd() + # mlmd_file_name = "./mlmd" + + # # check whether mlmd file exists or not + # if self.args.file_name: + # mlmd_file_name = self.args.file_name + # current_directory = os.path.dirname(self.args.file_name) + + # # checks if mlmd file is present in current directory or given directory + # if not os.path.exists(mlmd_file_name): + # return f"ERROR: {mlmd_file_name} doesn't exists in the {current_directory}." + # else: + # # creating cmfquery object + # query = cmfquery.CmfQuery(mlmd_file_name) + # # Put a check to see whether pipline exists or not + # pipeline_name = self.args.pipeline_name + # if not query.get_pipeline_id(pipeline_name) > 0: + # return f"ERROR: Pipeline {pipeline_name} doesn't exist!!" print("Executing cmf artifact push command..") artifact_push_instance = CmdArtifactPush(self.args) - if artifact_push_instance.run(): + # print(artifact_push_instance.run().status) + if artifact_push_instance.run().status == "success": print("Executing cmf metadata push command..") metadata_push_instance = CmdMetadataPush(self.args) - if metadata_push_instance.run(): + if metadata_push_instance.run().status == "success": print("Execution git push command..") + print(self.git_push()) return self.git_push() @@ -129,17 +128,23 @@ def add_parser(subparsers, parent_parser): "-p", "--pipeline_name", required=True, + action="append", help="Specify Pipeline name.", metavar="", ) parser.add_argument( - "-f", "--file_name", help="Specify mlmd file name.", metavar="" + "-f", + "--file_name", + action="append", + help="Specify mlmd file name.", + metavar="" ) parser.add_argument( "-e", "--execution", + action="append", help="Specify Execution id.", default=None, metavar="", @@ -148,6 +153,7 @@ def add_parser(subparsers, parent_parser): parser.add_argument( "-t", "--tensorboard", + action="append", help="Specify path to tensorboard logs for the pipeline.", metavar="" ) From be0f58cc00fbcdab6cf3e60e2db0ce34133c5906 Mon Sep 17 00:00:00 2001 From: AyeshaSanadi Date: Thu, 26 Dec 2024 19:10:29 +0530 Subject: [PATCH 29/41] Added validation for artifact/metadata/repo push file --- cmflib/commands/artifact/push.py | 10 +++++-- cmflib/commands/metadata/push.py | 45 +++++++++++++++++++++----------- cmflib/commands/repo/push.py | 4 +-- 3 files changed, 39 insertions(+), 20 deletions(-) diff --git a/cmflib/commands/artifact/push.py b/cmflib/commands/artifact/push.py index 1f2813e5..3ad27182 100644 --- a/cmflib/commands/artifact/push.py +++ b/cmflib/commands/artifact/push.py @@ -28,7 +28,14 @@ from cmflib.dvc_wrapper import dvc_add_attribute from cmflib.cli.utils import find_root from cmflib.utils.cmf_config import CmfConfig -from cmflib.cmf_exception_handling import PipelineNotFound, Minios3ServerInactive, FileNotFound, ExecutionsNotFound, CmfNotConfigured, ArtifactPushSuccess, MissingArgument, DuplicateArgumentNotAllowed +from cmflib.cmf_exception_handling import ( + PipelineNotFound, Minios3ServerInactive, + FileNotFound, + ExecutionsNotFound, + CmfNotConfigured, + ArtifactPushSuccess, + MissingArgument, + DuplicateArgumentNotAllowed) class CmdArtifactPush(CmdBase): def run(self): @@ -61,7 +68,6 @@ def run(self): return result # Default path of mlmd file - mlmd_file_name = "./mlmd" current_directory = os.getcwd() if not self.args.file_name: # If self.args.file_name is None or an empty list ([]). mlmd_file_name = "./mlmd" # Default path for mlmd file name. diff --git a/cmflib/commands/metadata/push.py b/cmflib/commands/metadata/push.py index 5178a0b0..3c21d335 100644 --- a/cmflib/commands/metadata/push.py +++ b/cmflib/commands/metadata/push.py @@ -35,7 +35,9 @@ CmfServerNotAvailable, InternalServerError, CmfNotConfigured, - InvalidTensorboardFilePath + InvalidTensorboardFilePath, + MissingArgument, + DuplicateArgumentNotAllowed ) # This class pushes mlmd file to cmf-server class CmdMetadataPush(CmdBase): @@ -54,13 +56,20 @@ def run(self): attr_dict = CmfConfig.read_config(config_file_path) url = attr_dict.get("cmf-server-ip", "http://127.0.0.1:80") - mlmd_file_name = "./mlmd" current_directory = os.getcwd() - # checks if mlmd filepath is given - if self.args.file_name: - mlmd_file_name = self.args.file_name - current_directory = os.path.dirname(self.args.file_name) - + if not self.args.file_name: # If self.args.file_name is None or an empty list ([]). + mlmd_file_name = "./mlmd" # Default path for mlmd file name. + elif len(self.args.file_name) > 1: # If the user provided more than one file name. + raise DuplicateArgumentNotAllowed("file_name", "-f") + elif not self.args.file_name[0]: # self.args.file_name[0] is an empty string (""). + raise MissingArgument("file name") + else: + mlmd_file_name = self.args.file_name[0].strip() + if mlmd_file_name == "mlmd": + mlmd_file_name = "./mlmd" + + current_directory = os.path.dirname(mlmd_file_name) + # checks if mlmd file is present in current directory or given directory if not os.path.exists(mlmd_file_name): raise FileNotFound(mlmd_file_name, current_directory) @@ -71,11 +80,18 @@ def run(self): status_code = 0 # Checks if pipeline name exists - if self.args.pipeline_name in query.get_pipeline_names(): + if self.args.pipeline_name is not None and len(self.args.pipeline_name) > 1: + raise DuplicateArgumentNotAllowed("pipeline_name", "-p") + elif not self.args.pipeline_name[0]: # self.args.pipeline_name[0] is an empty string (""). + raise MissingArgument("pipeline name") + else: + pipeline_name = self.args.pipeline_name[0] + if pipeline_name in query.get_pipeline_names(): print("metadata push started") print("........................................") # converts mlmd file to json format - json_payload = query.dumptojson(self.args.pipeline_name, None) + json_payload = query.dumptojson(pipeline_name, None) + # checks if execution_id is given by user if self.args.execution: exec_id = self.args.execution @@ -87,14 +103,14 @@ def run(self): execution_flag = 1 # calling mlmd_push api to push mlmd file to cmf-server response = server_interface.call_mlmd_push( - json_payload, url, exec_id, self.args.pipeline_name + json_payload, url, exec_id, pipeline_name ) break if execution_flag == 0: raise ExecutionIDNotFound(exec_id) else: exec_id = None - response = server_interface.call_mlmd_push(json_payload, url, exec_id, self.args.pipeline_name) + response = server_interface.call_mlmd_push(json_payload, url, exec_id, pipeline_name) status_code = response.status_code if status_code == 200: output = "" @@ -118,7 +134,7 @@ def run(self): # check if the path provided is for a file if os.path.isfile(self.args.tensorboard): file_name = os.path.basename(self.args.tensorboard) - tresponse = server_interface.call_tensorboard(url, self.args.pipeline_name, file_name, self.args.tensorboard) + tresponse = server_interface.call_tensorboard(url, pipeline_name, file_name, self.args.tensorboard) tstatus_code = tresponse.status_code if tstatus_code == 200: # give status code as success @@ -133,7 +149,7 @@ def run(self): for file_name in files: file_path = os.path.join(root, file_name) relative_path = os.path.relpath(file_path, self.args.tensorboard) - tresponse = server_interface.call_tensorboard(url, self.args.pipeline_name, relative_path, file_path) + tresponse = server_interface.call_tensorboard(url, pipeline_name, relative_path, file_path) if tresponse.status_code == 200: print(f"tensorboard logs: File {file_name} uploaded successfully.") else: @@ -151,7 +167,7 @@ def run(self): else: return "ERROR: Status Code = {status_code}. Unable to push mlmd." else: - raise PipelineNotFound(self.args.pipeline_name) + raise PipelineNotFound(pipeline_name) def add_parser(subparsers, parent_parser): @@ -186,7 +202,6 @@ def add_parser(subparsers, parent_parser): parser.add_argument( "-e", "--execution", - action="append", help="Specify Execution id.", metavar="", ) diff --git a/cmflib/commands/repo/push.py b/cmflib/commands/repo/push.py index 83b806e0..d80ca363 100644 --- a/cmflib/commands/repo/push.py +++ b/cmflib/commands/repo/push.py @@ -144,7 +144,6 @@ def add_parser(subparsers, parent_parser): parser.add_argument( "-e", "--execution", - action="append", help="Specify Execution id.", default=None, metavar="", @@ -152,8 +151,7 @@ def add_parser(subparsers, parent_parser): parser.add_argument( "-t", - "--tensorboard", - action="append", + "--tensorboard",\ help="Specify path to tensorboard logs for the pipeline.", metavar="" ) From fbf56f092465dc64223da9bb8d1ddf9e4958fa06 Mon Sep 17 00:00:00 2001 From: AyeshaSanadi Date: Mon, 30 Dec 2024 15:46:28 +0530 Subject: [PATCH 30/41] Added git push/pull command inside dvc_wrapper and apply validation for metadata push command. --- cmflib/commands/metadata/push.py | 171 +++++++++++++++++-------------- cmflib/commands/repo/push.py | 83 ++++----------- cmflib/dvc_wrapper.py | 28 +++++ 3 files changed, 140 insertions(+), 142 deletions(-) diff --git a/cmflib/commands/metadata/push.py b/cmflib/commands/metadata/push.py index 3c21d335..7d5cbc64 100644 --- a/cmflib/commands/metadata/push.py +++ b/cmflib/commands/metadata/push.py @@ -86,88 +86,101 @@ def run(self): raise MissingArgument("pipeline name") else: pipeline_name = self.args.pipeline_name[0] - if pipeline_name in query.get_pipeline_names(): - print("metadata push started") - print("........................................") - # converts mlmd file to json format - json_payload = query.dumptojson(pipeline_name, None) - - # checks if execution_id is given by user - if self.args.execution: - exec_id = self.args.execution - mlmd_data = json.loads(json_payload)["Pipeline"] - # checks if given execution_id present in mlmd - for i in mlmd_data[0]["stages"]: - for j in i["executions"]: - if j["id"] == int(exec_id): - execution_flag = 1 - # calling mlmd_push api to push mlmd file to cmf-server - response = server_interface.call_mlmd_push( - json_payload, url, exec_id, pipeline_name - ) - break - if execution_flag == 0: - raise ExecutionIDNotFound(exec_id) - else: - exec_id = None - response = server_interface.call_mlmd_push(json_payload, url, exec_id, pipeline_name) - status_code = response.status_code - if status_code == 200: - output = "" - display_output = "" - if response.json()['status']=="success": - display_output = "mlmd is successfully pushed." - output = MlmdFilePushSuccess(mlmd_file_name) - if response.json()["status"]=="exists": - display_output = "Executions already exists." - output = ExecutionsAlreadyExists() + if pipeline_name in query.get_pipeline_names(): + print("metadata push started") + print("........................................") + # converts mlmd file to json format + json_payload = query.dumptojson(pipeline_name, None) - if not self.args.tensorboard: - return output - print(display_output) - # /tensorboard api call is done only if mlmd push is successfully completed - # tensorboard parameter is passed - print("......................................") - print("tensorboard logs upload started!!") - print("......................................") - - # check if the path provided is for a file - if os.path.isfile(self.args.tensorboard): - file_name = os.path.basename(self.args.tensorboard) - tresponse = server_interface.call_tensorboard(url, pipeline_name, file_name, self.args.tensorboard) - tstatus_code = tresponse.status_code - if tstatus_code == 200: - # give status code as success - return TensorboardPushSuccess(file_name) + # checks if execution is given by user + if not self.args.execution: # If self.args.execution is None or an empty list ([]). + exec_id = None + response = server_interface.call_mlmd_push(json_payload, url, exec_id, pipeline_name) + elif len(self.args.execution) > 1: # If the user provided more than one execution. + raise DuplicateArgumentNotAllowed("execution", "-e") + elif not self.args.execution[0]: # self.args.execution[0] is an empty string (""). + raise MissingArgument("execution id") + elif not self.args.execution[0].isdigit(): + raise ExecutionIDNotFound(self.args.execution[0]) + else: + exec_id = int(self.args.execution[0]) + mlmd_data = json.loads(json_payload)["Pipeline"] + # checks if given execution present in mlmd + for i in mlmd_data[0]["stages"]: + for j in i["executions"]: + if j["id"] == int(exec_id): + execution_flag = 1 + # calling mlmd_push api to push mlmd file to cmf-server + response = server_interface.call_mlmd_push( + json_payload, url, exec_id, pipeline_name + ) + break + if execution_flag == 0: + raise ExecutionIDNotFound(exec_id) + status_code = response.status_code + print("status_code:",status_code) + print("response.json:",response.json()['status']) + if status_code == 200: + output = "" + display_output = "" + if response.json()['status']=="success": + display_output = "mlmd is successfully pushed." + output = MlmdFilePushSuccess(mlmd_file_name) + if response.json()["status"]=="exists": + display_output = "Executions already exists." + output = ExecutionsAlreadyExists() + + if not self.args.tensorboard: + return output + elif len(self.args.tensorboard) > 1: # If the user provided more than one tensorboard name. + raise DuplicateArgumentNotAllowed("tensorboard", "-t") + elif not self.args.tensorboard[0]: # self.args.tensorboard[0] is an empty string (""). + raise MissingArgument("tensorboard") + print(display_output) + # /tensorboard api call is done only if mlmd push is successfully completed + # tensorboard parameter is passed + print("......................................") + print("tensorboard logs upload started!!") + print("......................................") + + tensorboard = self.args.tensorboard[0] + # check if the path provided is for a file + if os.path.isfile(tensorboard): + file_name = os.path.basename(tensorboard) + tresponse = server_interface.call_tensorboard(url, pipeline_name, file_name, tensorboard) + tstatus_code = tresponse.status_code + if tstatus_code == 200: + # give status code as success + return TensorboardPushSuccess(file_name) + else: + # give status code as failure + return TensorboardPushFailure(file_name,tresponse.text) + # If path provided is a directory + elif os.path.isdir(tensorboard): + # Recursively push all files and subdirectories + for root, dirs, files in os.walk(tensorboard): + for file_name in files: + file_path = os.path.join(root, file_name) + relative_path = os.path.relpath(file_path, tensorboard) + tresponse = server_interface.call_tensorboard(url, pipeline_name, relative_path, file_path) + if tresponse.status_code == 200: + print(f"tensorboard logs: File {file_name} uploaded successfully.") + else: + # give status as failure + return TensorboardPushFailure(file_name,tresponse.text) + return TensorboardPushSuccess() else: - # give status code as failure - return TensorboardPushFailure(file_name,tresponse.text) - # If path provided is a directory - elif os.path.isdir(self.args.tensorboard): - # Recursively push all files and subdirectories - for root, dirs, files in os.walk(self.args.tensorboard): - for file_name in files: - file_path = os.path.join(root, file_name) - relative_path = os.path.relpath(file_path, self.args.tensorboard) - tresponse = server_interface.call_tensorboard(url, pipeline_name, relative_path, file_path) - if tresponse.status_code == 200: - print(f"tensorboard logs: File {file_name} uploaded successfully.") - else: - # give status as failure - return TensorboardPushFailure(file_name,tresponse.text) - return TensorboardPushSuccess() + return InvalidTensorboardFilePath() + elif status_code==422 and response.json()["status"]=="version_update": + raise UpdateCmfVersion + elif status_code == 404: + raise CmfServerNotAvailable + elif status_code == 500: + raise InternalServerError else: - return InvalidTensorboardFilePath() - elif status_code==422 and response.json()["status"]=="version_update": - raise UpdateCmfVersion - elif status_code == 404: - raise CmfServerNotAvailable - elif status_code == 500: - raise InternalServerError + return "ERROR: Status Code = {status_code}. Unable to push mlmd." else: - return "ERROR: Status Code = {status_code}. Unable to push mlmd." - else: - raise PipelineNotFound(pipeline_name) + raise PipelineNotFound(pipeline_name) def add_parser(subparsers, parent_parser): @@ -202,6 +215,7 @@ def add_parser(subparsers, parent_parser): parser.add_argument( "-e", "--execution", + action="append", help="Specify Execution id.", metavar="", ) @@ -209,6 +223,7 @@ def add_parser(subparsers, parent_parser): parser.add_argument( "-t", "--tensorboard", + action="append", help="Specify path to tensorboard logs for the pipeline.", metavar="" ) diff --git a/cmflib/commands/repo/push.py b/cmflib/commands/repo/push.py index d80ca363..3c9392f0 100644 --- a/cmflib/commands/repo/push.py +++ b/cmflib/commands/repo/push.py @@ -16,27 +16,15 @@ #!/usr/bin/env python3 import argparse -import os -import subprocess import requests from cmflib.cli.command import CmdBase -from cmflib import cmfquery -from cmflib.dvc_wrapper import dvc_get_config, git_get_repo, git_checkout_new_branch +from cmflib.dvc_wrapper import git_get_repo, git_get_pull, git_get_push from cmflib.commands.artifact.push import CmdArtifactPush from cmflib.commands.metadata.push import CmdMetadataPush class CmdRepoPush(CmdBase): - def run_command(self, command, cwd=None): - process = subprocess.Popen(command, cwd=cwd, shell=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - stdout, stderr = process.communicate() - return (stdout.decode('utf-8').strip() if stdout else '', - stderr.decode('utf-8').strip() if stderr else '', - process.returncode) - def branch_exists(self, repo_own, repo_name, branch_name): url = f"https://api.github.com/repos/{repo_own}/{repo_name}/branches/{branch_name}" res = requests.get(url) @@ -50,65 +38,30 @@ def git_push(self): url = url.split("/") # whether branch exists in git repo or not if self.branch_exists(url[-2], url[-1], "mlmd"): - # print("branch exists") # pull the code # push the code - stdout, stderr, returncode = self.run_command("git pull cmf_origin mlmd") + stdout, stderr, returncode = git_get_pull() # print(returncode+"1") if returncode != 0: return f"Error pulling changes: {stderr}" print(stdout) - - stdout, stderr, returncode = self.run_command("git push -u cmf_origin mlmd") - if returncode != 0: - return f"Error pushing changes: {stderr}" - print(stdout) - return "Successfully pushed and pulled changes!" - else: - print("branch not exists") - # push the code - stdout, stderr, returncode = self.run_command("git push -u cmf_origin mlmd") - if returncode != 0: - return f"Error pushing changes: {stderr}" - return "Successfully pushed and pulled changes!" + # push the code + stdout, stderr, returncode = git_get_push() + if returncode != 0: + return f"Error pushing changes: {stderr}" + return "Successfully pushed and pulled changes!" def run(self): - # # check whether dvc is configured or not - # msg = "'cmf' is not configured.\nExecute 'cmf init' command." - # result = dvc_get_config() - # if len(result) == 0: - # return msg - - # current_directory = os.getcwd() - # mlmd_file_name = "./mlmd" - - # # check whether mlmd file exists or not - # if self.args.file_name: - # mlmd_file_name = self.args.file_name - # current_directory = os.path.dirname(self.args.file_name) - - # # checks if mlmd file is present in current directory or given directory - # if not os.path.exists(mlmd_file_name): - # return f"ERROR: {mlmd_file_name} doesn't exists in the {current_directory}." - # else: - # # creating cmfquery object - # query = cmfquery.CmfQuery(mlmd_file_name) - # # Put a check to see whether pipline exists or not - # pipeline_name = self.args.pipeline_name - # if not query.get_pipeline_id(pipeline_name) > 0: - # return f"ERROR: Pipeline {pipeline_name} doesn't exist!!" - - print("Executing cmf artifact push command..") - artifact_push_instance = CmdArtifactPush(self.args) - # print(artifact_push_instance.run().status) - if artifact_push_instance.run().status == "success": - print("Executing cmf metadata push command..") - metadata_push_instance = CmdMetadataPush(self.args) - if metadata_push_instance.run().status == "success": - print("Execution git push command..") - print(self.git_push()) - return self.git_push() + print("Executing cmf artifact push command..") + artifact_push_instance = CmdArtifactPush(self.args) + if artifact_push_instance.run().status == "success": + print("Executing cmf metadata push command..") + metadata_push_instance = CmdMetadataPush(self.args) + if metadata_push_instance.run().status == "success": + print("Execution git push command..") + print(self.git_push()) + return def add_parser(subparsers, parent_parser): @@ -144,6 +97,7 @@ def add_parser(subparsers, parent_parser): parser.add_argument( "-e", "--execution", + action="append", help="Specify Execution id.", default=None, metavar="", @@ -151,7 +105,8 @@ def add_parser(subparsers, parent_parser): parser.add_argument( "-t", - "--tensorboard",\ + "--tensorboard", + action="append", help="Specify path to tensorboard logs for the pipeline.", metavar="" ) diff --git a/cmflib/dvc_wrapper.py b/cmflib/dvc_wrapper.py index b68a35bf..80258795 100644 --- a/cmflib/dvc_wrapper.py +++ b/cmflib/dvc_wrapper.py @@ -492,3 +492,31 @@ def git_modify_remote_url(git_url) -> str: print(f"Unexpected {outs}") print(f"Unexpected {errs}") return commit + +# Pulling code from mlmd branch +def git_get_pull() -> str: + process = subprocess.Popen('git pull cmf_origin mlmd', + cwd=None, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + stdout, stderr = process.communicate() + return ( + stdout.decode('utf-8').strip() if stdout else '', + stderr.decode('utf-8').strip() if stderr else '', + process.returncode + ) + +# Pusing code inside mlmd branch +def git_get_push() -> str: + process = subprocess.Popen('git push -u cmf_origin mlmd', + cwd=None, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + stdout, stderr = process.communicate() + return ( + stdout.decode('utf-8').strip() if stdout else '', + stderr.decode('utf-8').strip() if stderr else '', + process.returncode + ) \ No newline at end of file From 1caa9c4ec193fb39ddf142c012924f866765af10 Mon Sep 17 00:00:00 2001 From: abhinav chobey Date: Mon, 30 Dec 2024 02:58:04 -0800 Subject: [PATCH 31/41] validaton added to artifact pull and metadata pull --- cmflib/commands/artifact/pull.py | 68 +++++++++++++++++--------------- cmflib/commands/metadata/pull.py | 34 ++++++++++++---- 2 files changed, 62 insertions(+), 40 deletions(-) diff --git a/cmflib/commands/artifact/pull.py b/cmflib/commands/artifact/pull.py index b7e7396b..34c56886 100644 --- a/cmflib/commands/artifact/pull.py +++ b/cmflib/commands/artifact/pull.py @@ -84,14 +84,11 @@ def extract_repo_args(self, type: str, name: str, url: str, current_directory: s # information from the user-supplied arguments. # url = Test-env:/home/user/local-storage/files/md5/06/d100ff3e04e2c87bf20f0feacc9034, # Second-env:/home/user/local-storage/files/md5/06/d100ff3e04e2c" - # s_url = Url without pipeline name - s_url = self.split_url_pipeline(url, self.args.pipeline_name) - + s_url = self.split_url_pipeline(url, self.args.pipeline_name[0]) # got url in the form of /home/user/local-storage/files/md5/06/d100ff3e04e2c # spliting url using '/' delimiter token = s_url.split("/") - # name = artifacts/model/model.pkl name = name.split(":")[0] if type == "minio": @@ -119,15 +116,12 @@ def extract_repo_args(self, type: str, name: str, url: str, current_directory: s elif type == "local": token_length = len(token) download_loc = current_directory + "/" + name - # local artifact repo path = local-storage/files/md5/23/69v2uu3jeejjeiw. # token is a list = ['local-storage', 'files', 'md5', '23', '69v2uu3jeejjeiw'] # get last 4 element inside token token = token[(token_length-4):] - # join last 4 token using '/' delimiter current_dvc_loc = "/".join(token) - return current_dvc_loc, download_loc elif type == "ssh": @@ -169,11 +163,11 @@ def search_artifact(self, input_dict): continue # Splitting the 'name' using ':' as the delimiter and storing the first argument in the 'name' variable. name = name.split(":")[0] - artifact_hash = name = name.split(":")[1] + #artifact_hash = name = name.split(":")[1] # Splitting the path on '/' to extract the file name, excluding the directory structure. file_name = name.split('/')[-1] - if file_name == self.args.artifact_name: - return name, url, artifact_hash + if file_name == self.args.artifact_name[0]: + return name, url else: pass @@ -183,26 +177,40 @@ def run(self): # pipeline_name = self.args.pipeline_name current_directory = os.getcwd() mlmd_file_name = "./mlmd" - if self.args.file_name: - mlmd_file_name = self.args.file_name + if not self.args.file_name: # If self.args.file_name is None or an empty list ([]). + mlmd_file_name = "./mlmd" # Default path for mlmd file name. + elif len(self.args.file_name) > 1: # If the user provided more than one file name. + raise DuplicateArgumentNotAllowed("file_name", "-f") + elif not self.args.file_name[0]: # self.args.file_name[0] is an empty string (""). + raise MissingArgument("file name") + else: + mlmd_file_name = self.args.file_name[0].strip() if mlmd_file_name == "mlmd": mlmd_file_name = "./mlmd" - print("mlmd_file_name",mlmd_file_name) - current_directory = os.path.dirname(mlmd_file_name) + current_directory = os.path.dirname(mlmd_file_name) + if not self.args.artifact_name: # If self.args.artifact_name[0] is None or an empty list ([]). + pass + elif len(self.args.artifact_name) > 1: # If the user provided more than one artifact_name. + raise DuplicateArgumentNotAllowed("artifact_name", "-a") + elif not self.args.artifact_name[0]: # self.args.artifact_name[0] is an empty string (""). + raise MissingArgument("artifact name") + if not os.path.exists(mlmd_file_name): #checking if MLMD files exists raise FileNotFound(mlmd_file_name, current_directory) query = cmfquery.CmfQuery(mlmd_file_name) - if not query.get_pipeline_id(self.args.pipeline_name) > 0: #checking if pipeline name exists in mlmd + if self.args.pipeline_name is not None and len(self.args.pipeline_name) > 1: + raise DuplicateArgumentNotAllowed("pipeline_name", "-p") + elif not self.args.pipeline_name[0]: # self.args.pipeline_name[0] is an empty string (""). + raise MissingArgument("pipeline name") + elif not query.get_pipeline_id(self.args.pipeline_name[0]) > 0: #checking if pipeline name exists in mlmd raise PipelineNotFound(self.args.pipeline_name) # getting all pipeline stages[i.e Prepare, Featurize, Train and Evaluate] - stages = query.get_pipeline_stages(self.args.pipeline_name) + stages = query.get_pipeline_stages(self.args.pipeline_name[0]) executions = [] identifiers = [] - print("stages:",stages) for stage in stages: # getting all executions for stages executions = query.get_all_executions_in_stage(stage) - print("executions:",executions) # check if stage has executions if len(executions) > 0: # converting it to dictionary @@ -216,25 +224,21 @@ def run(self): name_url_dict = {} if len(identifiers) == 0: # check if there are no executions raise ExecutionsNotFound() - print("identifiers: ", identifiers) for identifier in identifiers: get_artifacts = query.get_all_artifacts_for_execution( identifier ) # getting all artifacts with id - print("get_artifacts: ",get_artifacts) temp_dict = dict(zip(get_artifacts['name'], get_artifacts['url'])) # getting dictionary of name and url pair name_url_dict.update(temp_dict) # updating name_url_dict with temp_dict #print(name_url_dict) # name_url_dict = ('artifacts/parsed/test.tsv:6f597d341ceb7d8fbbe88859a892ef81', 'Test-env:/home/sharvark/local-storage/6f/597d341ceb7d8fbbe88859a892ef81' # name_url_dict = ('artifacts/parsed/test.tsv:6f597d341ceb7d8fbbe88859a892ef81', 'Test-env:/home/sharvark/local-storage/6f/597d341ceb7d8fbbe88859a892ef81,Second-env:/home/sharvark/local-storage/6f/597d341ceb7d8fbbe88859a892ef81') - # print('i am here') output = DvcConfig.get_dvc_config() # pulling dvc config - if type(output) is not dict: raise CmfNotConfigured(output) """ There are multiple scenarios for cmf artifact pull - Code checks if self.args.artifact_name is provided by user or not + Code checks if self.args.artifact_name[0] is provided by user or not under these conditions there are two more conditions 1. if file is not .dir (single file) Download single file @@ -246,14 +250,14 @@ def run(self): if dvc_config_op["core.remote"] == "minio": minio_class_obj = minio_artifacts.MinioArtifacts(dvc_config_op) # Check if a specific artifact name is provided as input. - if self.args.artifact_name: + if self.args.artifact_name[0]: # Search for the artifact in the metadata store. output = self.search_artifact(name_url_dict) # output[0] = artifact_name # output[1] = url # output[2] = hash if output is None: - raise ArtifactNotFound(self.args.artifact_name) + raise ArtifactNotFound(self.args.artifact_name[0]) else: # Extract repository arguments specific to MinIO. minio_args = self.extract_repo_args("minio", output[0], output[1], current_directory) @@ -352,9 +356,8 @@ def run(self): output = self.search_artifact(name_url_dict) # output[0] = name # output[1] = url - if output is None: - raise ArtifactNotFound(self.args.artifact_name) + raise ArtifactNotFound(self.args.artifact_name[0]) else: # Extract repository arguments specific to Local repo. local_args = self.extract_repo_args("local", output[0], output[1], current_directory) @@ -439,7 +442,7 @@ def run(self): # output[0] = name # output[1] = url if output is None: - raise ArtifactNotFound(self.args.artifact_name) + raise ArtifactNotFound(self.args.artifact_name[0]) else: # Extract repository arguments specific to ssh-remote. args = self.extract_repo_args("ssh", output[0], output[1], current_directory) @@ -547,7 +550,7 @@ def run(self): # output[1] = url # output[3]=artifact_hash if output is None: - raise ArtifactNotFound(self.args.artifact_name) + raise ArtifactNotFound(self.args.artifact_name[0]) else: args = self.extract_repo_args("osdf", output[0], output[1], current_directory) download_flag, message = osdfremote_class_obj.download_artifacts( @@ -603,7 +606,7 @@ def run(self): # output[0] = name # output[1] = url if output is None: - raise ArtifactNotFound(self.args.artifact_name) + raise ArtifactNotFound(self.args.artifact_name[0]) else: args = self.extract_repo_args("amazons3", output[0], output[1], current_directory) if args[0] and args[1] and args[2]: @@ -694,16 +697,17 @@ def add_parser(subparsers, parent_parser): "-p", "--pipeline_name", required=True, + action="append", help="Specify Pipeline name.", metavar="", ) parser.add_argument( - "-f", "--file_name", help="Specify mlmd file name.", metavar="" + "-f", "--file_name", action="append", help="Specify mlmd file name.", metavar="" ) parser.add_argument( - "-a", "--artifact_name", help="Specify artifact name.", metavar="" + "-a", "--artifact_name", action="append", help="Specify artifact name.", metavar="" ) parser.set_defaults(func=CmdArtifactPull) diff --git a/cmflib/commands/metadata/pull.py b/cmflib/commands/metadata/pull.py index 3540a24d..8ec03dc0 100644 --- a/cmflib/commands/metadata/pull.py +++ b/cmflib/commands/metadata/pull.py @@ -23,7 +23,9 @@ from cmflib.server_interface import server_interface from cmflib.utils.cmf_config import CmfConfig from cmflib.cmf_exception_handling import ( + DuplicateArgumentNotAllowed, PipelineNotFound, + MissingArgument, CmfNotConfigured, ExecutionIDNotFound, MlmdNotFoundOnServer, MlmdFilePullSuccess, @@ -55,13 +57,27 @@ def run(self): cmd = "pull" status = 0 exec_id = None + if self.args.pipeline_name is not None and len(self.args.pipeline_name) > 1: + raise DuplicateArgumentNotAllowed("pipeline_name", "-p") + elif not self.args.pipeline_name[0]: # self.args.pipeline_name[0] is an empty string (""). + raise MissingArgument("pipeline name") + if not self.args.execution: # If self.args.execution[0] is None or an empty list ([]). + pass + elif len(self.args.execution) > 1: # If the user provided more than one execution id. + raise DuplicateArgumentNotAllowed("execution id", "-e") + elif not self.args.execution[0]: # self.args.execution[0] is an empty string (""). + raise MissingArgument("execution id") if self.args.file_name: # setting directory where mlmd file will be dumped - if not os.path.isdir(self.args.file_name): - temp = os.path.dirname(self.args.file_name) + if len(self.args.file_name) > 1: # If the user provided more than one file name. + raise DuplicateArgumentNotAllowed("file_name", "-f") + elif not self.args.file_name[0]: # self.args.file_name[0] is an empty string (""). + raise MissingArgument("file name") + if not os.path.isdir(self.args.file_name[0]): + temp = os.path.dirname(self.args.file_name[0]) if temp != "": current_directory = temp if os.path.exists(current_directory): - full_path_to_dump = self.args.file_name + full_path_to_dump = self.args.file_name[0] else: raise DirectoryNotfound(current_dir= current_directory) else: @@ -69,14 +85,14 @@ def run(self): else: full_path_to_dump = os.getcwd() + "/mlmd" if self.args.execution: - exec_id = self.args.execution + exec_id = self.args.execution[0] output = server_interface.call_mlmd_pull( - url, self.args.pipeline_name, exec_id + url, self.args.pipeline_name[0], exec_id ) # calls cmf-server api to get mlmd file data(Json format) status = output.status_code # checks If given pipeline does not exists/ elif pull mlmd file/ else mlmd file is not available if output.content.decode() == None: - raise PipelineNotFound(self.args.pipeline_name) + raise PipelineNotFound(self.args.pipeline_name[0]) elif output.content.decode() == "no_exec_id": raise ExecutionIDNotFound(exec_id) @@ -93,7 +109,7 @@ def run(self): elif status == 413: raise MlmdNotFoundOnServer elif status == 406: - raise PipelineNotFound(self.args.pipeline_name) + raise PipelineNotFound(self.args.pipeline_name[0]) elif status == 404: raise CmfServerNotAvailable elif status == 500: @@ -117,6 +133,7 @@ def add_parser(subparsers, parent_parser): "-p", "--pipeline_name", required=True, + action="append", help="Specify Pipeline name.", metavar="", ) @@ -124,12 +141,13 @@ def add_parser(subparsers, parent_parser): parser.add_argument( "-f", "--file_name", + action="append", help="Specify mlmd file name with full path.", metavar="", ) parser.add_argument( - "-e", "--execution", help="Specify Execution id", metavar="" + "-e", "--execution", action="append", help="Specify Execution id", metavar="" ) parser.set_defaults(func=CmdMetadataPull) From 9e5cffa6575015404b73a36b77969b67046317f0 Mon Sep 17 00:00:00 2001 From: AyeshaSanadi Date: Tue, 31 Dec 2024 13:42:21 +0530 Subject: [PATCH 32/41] Added description for command --- cmflib/cmf.py | 44 ++++++++++++++++++++ cmflib/cmf_commands_wrapper.py | 40 ++++++++++++++++++ cmflib/commands/artifact/pull.py | 46 +++++++++++---------- cmflib/commands/metadata/pull.py | 3 -- cmflib/commands/metadata/push.py | 2 - cmflib/commands/repo/__init__.py | 5 ++- cmflib/commands/repo/pull.py | 71 +++++++++++--------------------- cmflib/commands/repo/push.py | 15 ++++--- 8 files changed, 144 insertions(+), 82 deletions(-) diff --git a/cmflib/cmf.py b/cmflib/cmf.py index be181c4c..fe282b63 100644 --- a/cmflib/cmf.py +++ b/cmflib/cmf.py @@ -71,6 +71,8 @@ _artifact_list, _pipeline_list, _execution_list, + _repo_push, + _repo_pull, ) class Cmf: @@ -2369,3 +2371,45 @@ def artifact_list(pipeline_name: str, filepath = "./mlmd", artifact_name: str = # Optional arguments: filepath( path to store mlmd file), artifact_name output = _artifact_list(pipeline_name, filepath, artifact_name) return output + + +def repo_push(pipeline_name: str, filepath = "./mlmd", tensorboard_path: str = "", execution_id: str = ""): + """ Push artifacts, metadata files, and source code to the user's artifact repository, cmf-server, and git respectively. + Example: + ```python + result = _repo_push("example_pipeline", "./mlmd_directory", "example_execution_id", "./tensorboard_path") + ``` + Args: + pipeline_name: Name of the pipeline. + filepath: Path to store the mlmd file. + execution_id: Executions for particular execution id. + tensorboard_path: Path to tensorboard logs. + Returns: + Output from the _repo_push function. + """ + + # Required arguments: pipeline_name + # Optional arguments: filepath, execution_id, tensorboard_path + output = _repo_push(pipeline_name, filepath, execution_id, tensorboard_path) + return output + + +def repo_pull(pipeline_name: str, filepath = "./mlmd", artifact_name: str = "", execution_id: str = ""): + """ Pull artifacts, metadata files, and source code from the user's artifact repository, cmf-server, and git respectively. + Example: + ```python + result = _repo_pull("example_pipeline", "./mlmd_directory", "example_artifact_name", "example_execution_id") + ``` + Args: + pipeline_name: Name of the pipeline. + filepath: Path to store the mlmd file. + artifact_name: Artifacts for particular artifact name. + execution_id: Executions for particular execution id. + Returns: + Output from the _repo_pull function. + """ + + # Required arguments: pipeline_name + # Optional arguments: filepath, artifact_name, execution_id + output = _repo_pull(pipeline_name, filepath, artifact_name, execution_id) + return output \ No newline at end of file diff --git a/cmflib/cmf_commands_wrapper.py b/cmflib/cmf_commands_wrapper.py index 4e2c5325..23090af6 100644 --- a/cmflib/cmf_commands_wrapper.py +++ b/cmflib/cmf_commands_wrapper.py @@ -334,3 +334,43 @@ def _execution_list(pipeline_name, file_name, execution_id): print(msg) return msg +def _repo_push(pipeline_name, file_name, tensorboard_path, execution_id): + cli_args = cli.parse_args( + [ + "repo", + "push", + "-p", + pipeline_name, + "-f", + file_name, + "-e", + execution_id, + "-t", + tensorboard_path + ] + ) + cmd = cli_args.func(cli_args) + msg = cmd.do_run() + print(msg) + return msg + +def _repo_pull(pipeline_name, file_name, artifact_name, execution_id): + cli_args = cli.parse_args( + [ + "execution", + "list", + "-p", + pipeline_name, + "-f", + file_name, + "-a", + artifact_name, + "-e", + execution_id + ] + ) + cmd = cli_args.func(cli_args) + msg = cmd.do_run() + print(msg) + return msg + diff --git a/cmflib/commands/artifact/pull.py b/cmflib/commands/artifact/pull.py index 34c56886..6bcc6715 100644 --- a/cmflib/commands/artifact/pull.py +++ b/cmflib/commands/artifact/pull.py @@ -172,6 +172,10 @@ def search_artifact(self, input_dict): pass def run(self): + output = DvcConfig.get_dvc_config() # pulling dvc config + if type(output) is not dict: + raise CmfNotConfigured(output) + # check whether 'mlmd' file exist in current directory # or in the directory provided by user # pipeline_name = self.args.pipeline_name @@ -180,16 +184,17 @@ def run(self): if not self.args.file_name: # If self.args.file_name is None or an empty list ([]). mlmd_file_name = "./mlmd" # Default path for mlmd file name. elif len(self.args.file_name) > 1: # If the user provided more than one file name. - raise DuplicateArgumentNotAllowed("file_name", "-f") + raise DuplicateArgumentNotAllowed("file_name", "-f") elif not self.args.file_name[0]: # self.args.file_name[0] is an empty string (""). - raise MissingArgument("file name") + raise MissingArgument("file name") else: mlmd_file_name = self.args.file_name[0].strip() - if mlmd_file_name == "mlmd": - mlmd_file_name = "./mlmd" + if "/" not in mlmd_file_name: + mlmd_file_name = "./"+mlmd_file_name current_directory = os.path.dirname(mlmd_file_name) + if not self.args.artifact_name: # If self.args.artifact_name[0] is None or an empty list ([]). - pass + pass elif len(self.args.artifact_name) > 1: # If the user provided more than one artifact_name. raise DuplicateArgumentNotAllowed("artifact_name", "-a") elif not self.args.artifact_name[0]: # self.args.artifact_name[0] is an empty string (""). @@ -198,12 +203,14 @@ def run(self): if not os.path.exists(mlmd_file_name): #checking if MLMD files exists raise FileNotFound(mlmd_file_name, current_directory) query = cmfquery.CmfQuery(mlmd_file_name) + if self.args.pipeline_name is not None and len(self.args.pipeline_name) > 1: raise DuplicateArgumentNotAllowed("pipeline_name", "-p") elif not self.args.pipeline_name[0]: # self.args.pipeline_name[0] is an empty string (""). raise MissingArgument("pipeline name") elif not query.get_pipeline_id(self.args.pipeline_name[0]) > 0: #checking if pipeline name exists in mlmd raise PipelineNotFound(self.args.pipeline_name) + # getting all pipeline stages[i.e Prepare, Featurize, Train and Evaluate] stages = query.get_pipeline_stages(self.args.pipeline_name[0]) executions = [] @@ -220,6 +227,7 @@ def run(self): identifiers.append(id) else: print("No Executions found for " + stage + " stage.") + # created dictionary name_url_dict = {} if len(identifiers) == 0: # check if there are no executions @@ -233,9 +241,6 @@ def run(self): #print(name_url_dict) # name_url_dict = ('artifacts/parsed/test.tsv:6f597d341ceb7d8fbbe88859a892ef81', 'Test-env:/home/sharvark/local-storage/6f/597d341ceb7d8fbbe88859a892ef81' # name_url_dict = ('artifacts/parsed/test.tsv:6f597d341ceb7d8fbbe88859a892ef81', 'Test-env:/home/sharvark/local-storage/6f/597d341ceb7d8fbbe88859a892ef81,Second-env:/home/sharvark/local-storage/6f/597d341ceb7d8fbbe88859a892ef81') - output = DvcConfig.get_dvc_config() # pulling dvc config - if type(output) is not dict: - raise CmfNotConfigured(output) """ There are multiple scenarios for cmf artifact pull Code checks if self.args.artifact_name[0] is provided by user or not @@ -275,7 +280,7 @@ def run(self): # Return success if the file is downloaded successfully. return ObjectDownloadSuccess(object_name, download_loc) else: - return ObjectDownloadFailure(object_name) + raise ObjectDownloadFailure(object_name) else: # If object name ends with `.dir`, download multiple files from a directory # return total_files_in_directory, files_downloaded @@ -292,7 +297,7 @@ def run(self): else: # Calculate the number of files that failed to download. file_failed_to_download = total_files_in_directory - dir_files_downloaded - return BatchDownloadFailure(dir_files_downloaded, file_failed_to_download) + raise BatchDownloadFailure(dir_files_downloaded, file_failed_to_download) else: # Handle the case where no specific artifact name is provided. @@ -341,7 +346,7 @@ def run(self): if not files_failed_to_download: return BatchDownloadSuccess(files_downloaded) else: - return BatchDownloadFailure(files_downloaded, files_failed_to_download) + raise BatchDownloadFailure(files_downloaded, files_failed_to_download) elif dvc_config_op["core.remote"] == "local-storage": local_class_obj = local_artifacts.LocalArtifacts(dvc_config_op) @@ -371,7 +376,7 @@ def run(self): # Return success if the file is downloaded successfully. return ObjectDownloadSuccess(object_name, download_loc) else: - return ObjectDownloadFailure(object_name) + raise ObjectDownloadFailure(object_name) else: # If object name ends with `.dir`, download multiple files from a directory @@ -384,7 +389,7 @@ def run(self): else: # Calculate the number of files that failed to download. file_failed_to_download = total_files_in_directory - dir_files_downloaded - return BatchDownloadFailure(dir_files_downloaded, file_failed_to_download) + raise BatchDownloadFailure(dir_files_downloaded, file_failed_to_download) else: # Handle the case where no specific artifact name is provided. files_downloaded = 0 @@ -430,7 +435,7 @@ def run(self): if not files_failed_to_download: return BatchDownloadSuccess(files_downloaded) else: - return BatchDownloadFailure( + raise BatchDownloadFailure( files_downloaded, files_failed_to_download) elif dvc_config_op["core.remote"] == "ssh-storage": @@ -459,7 +464,7 @@ def run(self): # Return success if the file is downloaded successfully. return ObjectDownloadSuccess(object_name, download_loc) else: - return ObjectDownloadFailure(object_name) + raise ObjectDownloadFailure(object_name) else: # If object name ends with `.dir`, download multiple files from a directory @@ -476,7 +481,7 @@ def run(self): else: # Calculate the number of files that failed to download. file_failed_to_download = total_files_in_directory - dir_files_downloaded - return BatchDownloadFailure(dir_files_downloaded, file_failed_to_download) + raise BatchDownloadFailure(dir_files_downloaded, file_failed_to_download) else: # Handle the case where no specific artifact name is provided. files_downloaded = 0 @@ -520,7 +525,7 @@ def run(self): if not files_failed_to_download: return BatchDownloadSuccess(files_downloaded) else: - return BatchDownloadFailure(files_downloaded, files_failed_to_download) + raise BatchDownloadFailure(files_downloaded, files_failed_to_download) elif dvc_config_op["core.remote"] == "osdf": #Regenerate Token for OSDF from cmflib.utils.helper_functions import generate_osdf_token @@ -631,7 +636,7 @@ def run(self): return BatchDownloadSuccess(dir_files_downloaded) else: file_failed_to_download = total_files_in_directory - dir_files_downloaded - return BatchDownloadFailure(dir_files_downloaded, file_failed_to_download) + raise BatchDownloadFailure(dir_files_downloaded, file_failed_to_download) else: @@ -673,11 +678,11 @@ def run(self): if not files_failed_to_download: return BatchDownloadSuccess(files_downloaded) else: - return BatchDownloadFailure(files_downloaded, files_failed_to_download) + raise BatchDownloadFailure(files_downloaded, files_failed_to_download) else: remote = dvc_config_op["core.remote"] msg = f"{remote} is not valid artifact repository for CMF.\n Reinitialize CMF." - return msg + raise MsgFailure(msg_str=msg) def add_parser(subparsers, parent_parser): @@ -711,4 +716,3 @@ def add_parser(subparsers, parent_parser): ) parser.set_defaults(func=CmdArtifactPull) - diff --git a/cmflib/commands/metadata/pull.py b/cmflib/commands/metadata/pull.py index 8ec03dc0..417e0b37 100644 --- a/cmflib/commands/metadata/pull.py +++ b/cmflib/commands/metadata/pull.py @@ -38,9 +38,6 @@ # This class pulls mlmd file from cmf-server class CmdMetadataPull(CmdBase): - - def __init__(self, args): - self.args = args def run(self): cmfconfig = os.environ.get("CONFIG_FILE", ".cmfconfig") diff --git a/cmflib/commands/metadata/push.py b/cmflib/commands/metadata/push.py index 7d5cbc64..f1c07d8e 100644 --- a/cmflib/commands/metadata/push.py +++ b/cmflib/commands/metadata/push.py @@ -118,8 +118,6 @@ def run(self): if execution_flag == 0: raise ExecutionIDNotFound(exec_id) status_code = response.status_code - print("status_code:",status_code) - print("response.json:",response.json()['status']) if status_code == 200: output = "" display_output = "" diff --git a/cmflib/commands/repo/__init__.py b/cmflib/commands/repo/__init__.py index a92a9f15..3fefb0d8 100644 --- a/cmflib/commands/repo/__init__.py +++ b/cmflib/commands/repo/__init__.py @@ -23,12 +23,13 @@ # This parser adds positional arguments to the main parser def add_parser(subparsers, parent_parser): - REPO_HELP = "Command for repo push." + REPO_HELP = "Push and pull artifacts, metadata files, and source code to and from the user's artifact repository, cmf-server, and git respectively." + metadata_parser = subparsers.add_parser( "repo", parents=[parent_parser], - description="Command repo push.", + description="Push and pull artifacts, metadata files, and source code to and from the user's artifact repository, cmf-server, and git respectively.", help=REPO_HELP, formatter_class=argparse.RawDescriptionHelpFormatter, ) diff --git a/cmflib/commands/repo/pull.py b/cmflib/commands/repo/pull.py index f4fc310c..1aee14fa 100644 --- a/cmflib/commands/repo/pull.py +++ b/cmflib/commands/repo/pull.py @@ -16,29 +16,16 @@ #!/usr/bin/env python3 import argparse -import os -import subprocess import requests from cmflib.cli.command import CmdBase -from cmflib.dvc_wrapper import dvc_get_config, git_get_repo, git_checkout_new_branch +from cmflib.dvc_wrapper import git_get_repo, git_get_pull from cmflib.commands.artifact.pull import CmdArtifactPull from cmflib.commands.metadata.pull import CmdMetadataPull +from cmflib.cmf_exception_handling import MsgSuccess, MsgFailure class CmdRepoPull(CmdBase): - def __init__(self, args): - self.args = args - - def run_command(self, command, cwd=None): - process = subprocess.Popen(command, cwd=cwd, shell=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - stdout, stderr = process.communicate() - return (stdout.decode('utf-8').strip() if stdout else '', - stderr.decode('utf-8').strip() if stderr else '', - process.returncode) - def branch_exists(self, repo_own, repo_name, branch_name): url = f"https://api.github.com/repos/{repo_own}/{repo_name}/branches/{branch_name}" res = requests.get(url) @@ -52,50 +39,32 @@ def git_pull(self): url = url.split("/") # whether branch exists in git repo or not if self.branch_exists(url[-2], url[-1], "mlmd"): - print("branch exists") # git pull print("git pull started...") - stdout, stderr, returncode = self.run_command("git pull cmf_origin mlmd") - print(stdout) + stdout, stderr, returncode = git_get_pull() if returncode != 0: - return f"Error pulling changes: {stderr}" - return stdout + raise MsgFailure(msg_str=f"Error pulling changes: {stderr}") + return MsgSuccess(msg_str=stdout) else: - return "mlmd branch is not exists in github..." + return MsgSuccess(msg_str="mlmd branch does not exists inside github...") def run(self): - # check whether dvc is configured or not - # msg = "'cmf' is not configured.\nExecute 'cmf init' command." - # result = dvc_get_config() - # if len(result) == 0: - # return msg - - # current_directory = os.getcwd() - # mlmd_file_name = "./mlmd" - # if self.args.file_name: - # mlmd_file_name = self.args.file_name - # if mlmd_file_name == "mlmd": - # mlmd_file_name = "./mlmd" - # current_directory = os.path.dirname(mlmd_file_name) - - # if not os.path.exists(mlmd_file_name): - # return f"ERROR: {mlmd_file_name} doesn't exists in {current_directory} directory." - # else: + print("metadata pull started...") + instance_of_metadata = CmdMetadataPull(self.args) + if instance_of_metadata.run().status == "success": + print("artifact pull started...") instance_of_artifact = CmdArtifactPull(self.args) if instance_of_artifact.run().status == "success": - print("metadata pull started...") - instance_of_metadata = CmdMetadataPull(self.args) - if instance_of_metadata.run().status == "success": - return self.git_pull() + return self.git_pull() def add_parser(subparsers, parent_parser): - PULL_HELP = "Pull user-generated mlmd to server to create one single mlmd file for all the pipelines." + PULL_HELP = "Pull artifacts, metadata files, and source code from the user's artifact repository, cmf-server, and git respectively." parser = subparsers.add_parser( "pull", parents=[parent_parser], - description="Pull user's mlmd to cmf-server.", + description="Pull artifacts, metadata files, and source code from the user's artifact repository, cmf-server, and git respectively.", help=PULL_HELP, formatter_class=argparse.RawDescriptionHelpFormatter, ) @@ -106,23 +75,33 @@ def add_parser(subparsers, parent_parser): "-p", "--pipeline_name", required=True, + action="append", help="Specify Pipeline name.", metavar="", ) parser.add_argument( - "-f", "--file_name", help="Specify mlmd file name.", metavar="" + "-f", + "--file_name", + action="append", + help="Specify mlmd file name.", + metavar="", ) parser.add_argument( "-e", "--execution", + action="append", help="Specify Execution id.", metavar="", ) parser.add_argument( - "-a", "--artifact_name", help="Specify artifact name.", metavar="" + "-a", + "--artifact_name", + action="append", + help="Specify artifact name.", + metavar="", ) parser.set_defaults(func=CmdRepoPull) diff --git a/cmflib/commands/repo/push.py b/cmflib/commands/repo/push.py index 3c9392f0..ebf4c3c1 100644 --- a/cmflib/commands/repo/push.py +++ b/cmflib/commands/repo/push.py @@ -22,6 +22,7 @@ from cmflib.dvc_wrapper import git_get_repo, git_get_pull, git_get_push from cmflib.commands.artifact.push import CmdArtifactPush from cmflib.commands.metadata.push import CmdMetadataPush +from cmflib.cmf_exception_handling import MsgSuccess, MsgFailure class CmdRepoPush(CmdBase): @@ -41,15 +42,14 @@ def git_push(self): # pull the code # push the code stdout, stderr, returncode = git_get_pull() - # print(returncode+"1") if returncode != 0: - return f"Error pulling changes: {stderr}" + raise MsgFailure(msg_str=f"Error pulling changes: {stderr}") print(stdout) # push the code stdout, stderr, returncode = git_get_push() if returncode != 0: - return f"Error pushing changes: {stderr}" - return "Successfully pushed and pulled changes!" + raise MsgFailure(msg_str=f"Error pushing changes: {stderr}") + return MsgSuccess(msg_str="Successfully pushed and pulled changes!") def run(self): @@ -60,17 +60,16 @@ def run(self): metadata_push_instance = CmdMetadataPush(self.args) if metadata_push_instance.run().status == "success": print("Execution git push command..") - print(self.git_push()) - return + return self.git_push() def add_parser(subparsers, parent_parser): - PUSH_HELP = "Push user-generated mlmd to server to create one single mlmd file for all the pipelines." + PUSH_HELP = "Push artifacts, metadata files, and source code to the user's artifact repository, cmf-server, and git respectively." parser = subparsers.add_parser( "push", parents=[parent_parser], - description="Push user's mlmd to cmf-server.", + description="Push artifacts, metadata files, and source code to the user's artifact repository, cmf-server, and git respectively.", help=PUSH_HELP, formatter_class=argparse.RawDescriptionHelpFormatter, ) From 2f70b13fa7a870e324ef34d072fcd6fcac24e304 Mon Sep 17 00:00:00 2001 From: AyeshaSanadi <167299982+AyeshaSanadi@users.noreply.github.com> Date: Tue, 31 Dec 2024 14:00:21 +0530 Subject: [PATCH 33/41] Added description for cmf repo command inside document. --- docs/cmf_client/cmf_client.md | 46 ++++++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/docs/cmf_client/cmf_client.md b/docs/cmf_client/cmf_client.md index ed13bb9a..a8e8cbf9 100644 --- a/docs/cmf_client/cmf_client.md +++ b/docs/cmf_client/cmf_client.md @@ -2,7 +2,7 @@ # cmf ``` -Usage: cmf [-h] {init, artifact, metadata, execution, pipeline} +Usage: cmf [-h] {init, artifact, metadata, execution, pipeline, repo} ``` The `cmf` command is a comprehensive tool designed to initialize an artifact repository and perform various operations on artifacts, execution, pipeline and metadata. @@ -380,3 +380,47 @@ Optional Arguments -h, --help show this help message and exit. --f [file_name], --file-name [file_name] Specify the absolute or relative path for the input MLMD file. ``` + +## cmf repo +``` +Usage: cmf repo [-h] {push, pull} +``` +`cmf repo` command push and pull artifacts, metadata files, and source code to and from the user's artifact repository, cmf-server, and git respectively. +### cmf repo push +``` +Usage: cmf repo push [-h] -p [pipeline_name] -f [file_name] -e [exec_id] -t [tensorboard] +``` +`cmf repo push` command push artifacts, metadata files, and source code to the user's artifact repository, cmf-server, and git respectively. +``` +cmf repo push -p 'pipeline-name' -f '/path/to/mlmd-file-name' -e 'execution_id' -t 'tensorboard_log_path' +``` +Required Arguments +``` + -p [pipeline_name], --pipeline-name [pipeline_name] Specify Pipeline name. +``` +Optional Arguments +``` + -h, --help show this help message and exit. + -f [file_name], --file-name [file_name] Specify mlmd file name. + -e [exec_id], --execution [exec_id] Specify execution id. + -t [tensorboard], --tensorboard [tensorboard] Specify path to tensorboard logs for the pipeline. +``` +### cmf repo pull +``` +Usage: cmf repo pull [-h] -p [pipeline_name] -f [file_name] -e [exec_id] -a [artifact_name] +``` +`cmf repo pull` command pull artifacts, metadata files, and source code from the user's artifact repository, cmf-server, and git respectively. +``` +cmf repo pull -p 'pipeline-name' -f '/path/to/mlmd-file-name' -e 'execution_id' -a 'artifact_name' +``` +Required Arguments +``` + -p [pipeline_name], --pipeline-name [pipeline_name] Specify Pipeline name. +``` +Optional Arguments +``` + -h, --help show this help message and exit. + -f [file_name], --file-name [file_name] Specify mlmd file name. + -e [exec_id], --execution [exec_id] Specify execution id. + -a [artifact_name], --artifact_name [artifact_name] Specify the artifact name. +``` From 4bf170e3ee84c283c49e0b3cb934be21e896fbd9 Mon Sep 17 00:00:00 2001 From: AyeshaSanadi Date: Wed, 1 Jan 2025 14:25:30 +0530 Subject: [PATCH 34/41] Added comment repo pull/push file --- cmflib/commands/repo/pull.py | 22 ++++++++++++++++++---- cmflib/commands/repo/push.py | 24 +++++++++++++++++++----- 2 files changed, 37 insertions(+), 9 deletions(-) diff --git a/cmflib/commands/repo/pull.py b/cmflib/commands/repo/pull.py index 1aee14fa..a129cd01 100644 --- a/cmflib/commands/repo/pull.py +++ b/cmflib/commands/repo/pull.py @@ -26,7 +26,18 @@ class CmdRepoPull(CmdBase): - def branch_exists(self, repo_own, repo_name, branch_name): + def branch_exists(self, repo_own: str, repo_name: str, branch_name: str) -> bool: + """ + Check if a branch exists in a GitHub repository. + + Args: + repo_owner: The owner of the GitHub repository. + repo_name: The name of the GitHub repository. + branch_name: The name of the branch to check. + + Returns: + bool: True if the branch exists, otherwise False. + """ url = f"https://api.github.com/repos/{repo_own}/{repo_name}/branches/{branch_name}" res = requests.get(url) @@ -35,18 +46,21 @@ def branch_exists(self, repo_own, repo_name, branch_name): return False def git_pull(self): + # Getting github url from cmf init command url = git_get_repo() + # Example url = https://github.com/ABC/my-repo url = url.split("/") - # whether branch exists in git repo or not + # Check whether branch exists in git repo or not + # url[-2] = ABC, url-1] = my-repo if self.branch_exists(url[-2], url[-1], "mlmd"): - # git pull + # pull the code from mlmd branch print("git pull started...") stdout, stderr, returncode = git_get_pull() if returncode != 0: raise MsgFailure(msg_str=f"Error pulling changes: {stderr}") return MsgSuccess(msg_str=stdout) else: - return MsgSuccess(msg_str="mlmd branch does not exists inside github...") + raise MsgFailure(msg_str="Branch 'mlmd' does not exists!!") def run(self): print("metadata pull started...") diff --git a/cmflib/commands/repo/push.py b/cmflib/commands/repo/push.py index ebf4c3c1..918c075c 100644 --- a/cmflib/commands/repo/push.py +++ b/cmflib/commands/repo/push.py @@ -26,7 +26,18 @@ class CmdRepoPush(CmdBase): - def branch_exists(self, repo_own, repo_name, branch_name): + def branch_exists(self, repo_own: str, repo_name: str, branch_name: str) -> bool: + """ + Check if a branch exists in a GitHub repository. + + Args: + repo_owner: The owner of the GitHub repository. + repo_name: The name of the GitHub repository. + branch_name: The name of the branch to check. + + Returns: + bool: True if the branch exists, otherwise False. + """ url = f"https://api.github.com/repos/{repo_own}/{repo_name}/branches/{branch_name}" res = requests.get(url) @@ -35,17 +46,20 @@ def branch_exists(self, repo_own, repo_name, branch_name): return False def git_push(self): + # Getting github url from cmf init command url = git_get_repo() + # Example url = https://github.com/ABC/my-repo url = url.split("/") - # whether branch exists in git repo or not + # Check whether branch exists in git repo or not + # url[-2] = ABC, url-1] = my-repo if self.branch_exists(url[-2], url[-1], "mlmd"): - # pull the code - # push the code + # 1. pull the code from mlmd branch + # 2. push the code inside mlmd branch stdout, stderr, returncode = git_get_pull() if returncode != 0: raise MsgFailure(msg_str=f"Error pulling changes: {stderr}") print(stdout) - # push the code + # push the code inside mlmd branch stdout, stderr, returncode = git_get_push() if returncode != 0: raise MsgFailure(msg_str=f"Error pushing changes: {stderr}") From 22fb2d4f45e5b2bea489b62af112992fc4853514 Mon Sep 17 00:00:00 2001 From: abhinav chobey Date: Thu, 9 Jan 2025 05:10:46 -0800 Subject: [PATCH 35/41] added missing argmuents and duplicate arguments to init --- cmflib/commands/init/amazonS3.py | 66 ++++++++++++++++++++++--------- cmflib/commands/init/local.py | 43 +++++++++++++++----- cmflib/commands/init/minioS3.py | 53 +++++++++++++++++++------ cmflib/commands/init/sshremote.py | 54 +++++++++++++++++++------ 4 files changed, 165 insertions(+), 51 deletions(-) diff --git a/cmflib/commands/init/amazonS3.py b/cmflib/commands/init/amazonS3.py index ba254f2a..b5abb1a5 100644 --- a/cmflib/commands/init/amazonS3.py +++ b/cmflib/commands/init/amazonS3.py @@ -31,41 +31,61 @@ ) from cmflib.utils.cmf_config import CmfConfig from cmflib.utils.helper_functions import is_git_repo -from cmflib.cmf_exception_handling import Neo4jArgumentNotProvided, CmfInitComplete, CmfInitFailed +from cmflib.cmf_exception_handling import Neo4jArgumentNotProvided, CmfInitComplete, CmfInitFailed, DuplicateArgumentNotAllowed, MissingArgument +import sys class CmdInitAmazonS3(CmdBase): def run(self): # Reading CONFIG_FILE variable cmf_config = os.environ.get("CONFIG_FILE", ".cmfconfig") + + required_args = { + "url": self.args.url, + "access-key-id": self.args.access_key_id, + "secret-key": self.args.secret_key, + "git-remote-url": self.args.git_remote_url, + "session-token" : self.args.session_token, + "cmf-server-url" : self.args.cmf_server_url, + "neo4j-user" : self.args.neo4j_user, + "neo4j-password" : self.args.neo4j_password, + "neo4j_uri" : self.args.neo4j_uri + + } + for arg_name, arg_value in required_args.items(): + if arg_value: + if arg_name == "cmf-server-url" and len(arg_value) > 2: + raise DuplicateArgumentNotAllowed(arg_name,("--"+arg_name)) + if arg_value[0] == "": + raise MissingArgument(arg_name) + elif len(arg_value) > 1: + raise DuplicateArgumentNotAllowed(arg_name,("--"+arg_name)) + # checking if config file exists if not os.path.exists(cmf_config): # writing default value to config file attr_dict = {} attr_dict["server-ip"] = "http://127.0.0.1:80" CmfConfig.write_config(cmf_config, "cmf", attr_dict) - # if user gave --cmf-server-ip, override the config file - if self.args.cmf_server_url: + if self.args.cmf_server_url: attr_dict = {} attr_dict["server-ip"] = self.args.cmf_server_url CmfConfig.write_config(cmf_config, "cmf", attr_dict, True) - # read --neo4j details and add to the exsting file if self.args.neo4j_user and self.args.neo4j_password and self.args.neo4j_uri: attr_dict = {} - attr_dict["user"] = self.args.neo4j_user - attr_dict["password"] = self.args.neo4j_password - attr_dict["uri"] = self.args.neo4j_uri + attr_dict["user"] = self.args.neo4j_user[0] + attr_dict["password"] = self.args.neo4j_password[0] + attr_dict["uri"] = self.args.neo4j_uri[0] CmfConfig.write_config(cmf_config, "neo4j", attr_dict, True) elif ( - not self.args.neo4j_user - and not self.args.neo4j_password - and not self.args.neo4j_uri + not self.args.neo4j_user[0] + and not self.args.neo4j_password[0] + and not self.args.neo4j_uri[0] ): pass else: raise Neo4jArgumentNotProvided - output = is_git_repo() if not output: branch_name = "master" @@ -73,23 +93,23 @@ def run(self): git_quiet_init() git_checkout_new_branch(branch_name) git_initial_commit() - git_add_remote(self.args.git_remote_url) + git_add_remote(self.args.git_remote_url[0]) print("git init complete.") else: - git_modify_remote_url(self.args.git_remote_url) + git_modify_remote_url(self.args.git_remote_url[0]) print("git init complete.") print("Starting cmf init.") dvc_quiet_init() repo_type = "amazons3" - output = dvc_add_remote_repo(repo_type, self.args.url) + output = dvc_add_remote_repo(repo_type, self.args.url[0]) if not output: raise CmfInitFailed print(output) - dvc_add_attribute(repo_type, "access_key_id", self.args.access_key_id) - dvc_add_attribute(repo_type, "secret_access_key", self.args.secret_key) - dvc_add_attribute(repo_type, "session_token", self.args.session_token) + dvc_add_attribute(repo_type, "access_key_id", self.args.access_key_id[0]) + dvc_add_attribute(repo_type, "secret_access_key", self.args.secret_key[0]) + dvc_add_attribute(repo_type, "session_token", self.args.session_token[0]) status = CmfInitComplete() return status @@ -110,6 +130,7 @@ def add_parser(subparsers, parent_parser): required_arguments.add_argument( "--url", required=True, + action="append", help="Specify Amazon S3 bucket url.", metavar="", default=argparse.SUPPRESS, @@ -118,6 +139,7 @@ def add_parser(subparsers, parent_parser): required_arguments.add_argument( "--access-key-id", required=True, + action="append", help="Specify Access Key Id.", metavar="", default=argparse.SUPPRESS, @@ -127,6 +149,7 @@ def add_parser(subparsers, parent_parser): "--secret-key", required=True, help="Specify Secret Key.", + action="append", metavar="", default=argparse.SUPPRESS, ) @@ -136,13 +159,14 @@ def add_parser(subparsers, parent_parser): required=True, help="Specify Session Token.", metavar="", - default="", + action="append", ) required_arguments.add_argument( "--git-remote-url", required=True, help="Specify git repo url. eg: https://github.com/XXX/example.git", + action="append", metavar="", default=argparse.SUPPRESS, ) @@ -151,24 +175,28 @@ def add_parser(subparsers, parent_parser): "--cmf-server-url", help="Specify cmf-server URL.", metavar="", - default="http://127.0.0.1:80", + action="append", + default=["http://127.0.0.1:80"], ) parser.add_argument( "--neo4j-user", help="Specify neo4j user.", metavar="", + action="append", # default=argparse.SUPPRESS, ) parser.add_argument( "--neo4j-password", help="Specify neo4j password.", metavar="", + action="append", # default=argparse.SUPPRESS, ) parser.add_argument( "--neo4j-uri", help="Specify neo4j uri.eg bolt://localhost:7687", metavar="", + action="append", # default=argparse.SUPPRESS, ) diff --git a/cmflib/commands/init/local.py b/cmflib/commands/init/local.py index 79ce5ec0..201c4eae 100644 --- a/cmflib/commands/init/local.py +++ b/cmflib/commands/init/local.py @@ -30,11 +30,31 @@ ) from cmflib.utils.cmf_config import CmfConfig from cmflib.utils.helper_functions import is_git_repo +from cmflib.cmf_exception_handling import MissingArgument, DuplicateArgumentNotAllowed class CmdInitLocal(CmdBase): def run(self): # Reading CONFIG_FILE variable cmf_config = os.environ.get("CONFIG_FILE", ".cmfconfig") + + required_args = { + "path": self.args.path, + "git-remote-url": self.args.git_remote_url, + "cmf-server-url" : self.args.cmf_server_url, + "neo4j-user" : self.args.neo4j_user, + "neo4j-password" : self.args.neo4j_password, + "neo4j_uri" : self.args.neo4j_uri + } + + for arg_name, arg_value in required_args.items(): + if arg_value: + if arg_name == "cmf-server-url" and len(arg_value) > 2: + raise DuplicateArgumentNotAllowed(arg_name,("--"+arg_name)) + if arg_value[0] == "": + raise MissingArgument(arg_name) + elif len(arg_value) > 1: + raise DuplicateArgumentNotAllowed(arg_name,("--"+arg_name)) + # checking if config file exists if not os.path.exists(cmf_config): # writing default value to config file @@ -51,14 +71,14 @@ def run(self): # read --neo4j details and add to the exsting file if self.args.neo4j_user and self.args.neo4j_password and self.args.neo4j_uri: attr_dict = {} - attr_dict["user"] = self.args.neo4j_user - attr_dict["password"] = self.args.neo4j_password - attr_dict["uri"] = self.args.neo4j_uri + attr_dict["user"] = self.args.neo4j_user[0] + attr_dict["password"] = self.args.neo4j_password[0] + attr_dict["uri"] = self.args.neo4j_uri[0] CmfConfig.write_config(cmf_config, "neo4j", attr_dict, True) elif ( - not self.args.neo4j_user - and not self.args.neo4j_password - and not self.args.neo4j_uri + not self.args.neo4j_user[0] + and not self.args.neo4j_password[0] + and not self.args.neo4j_uri[0] ): pass else: @@ -72,16 +92,16 @@ def run(self): git_quiet_init() git_checkout_new_branch(branch_name) git_initial_commit() - git_add_remote(self.args.git_remote_url) + git_add_remote(self.args.git_remote_url[0]) print("git init complete.") else: - git_modify_remote_url(self.args.git_remote_url) + git_modify_remote_url(self.args.git_remote_url[0]) print("git init complete.") print("Starting cmf init.") dvc_quiet_init() repo_type = "local-storage" - output = dvc_add_remote_repo(repo_type, self.args.path) + output = dvc_add_remote_repo(repo_type, self.args.path[0]) if not output: raise CmfInitFailed print(output) @@ -104,6 +124,7 @@ def add_parser(subparsers, parent_parser): required_arguments.add_argument( "--path", required=True, + action="append", help="Specify local directory path.", metavar="", default=argparse.SUPPRESS, @@ -112,6 +133,7 @@ def add_parser(subparsers, parent_parser): required_arguments.add_argument( "--git-remote-url", required=True, + action="append", help="Specify git repo url, eg: https://github.com/XXX/example.git", metavar="", # default=argparse.SUPPRESS @@ -128,18 +150,21 @@ def add_parser(subparsers, parent_parser): "--neo4j-user", help="Specify neo4j user.", metavar="", + action="append", # default=argparse.SUPPRESS, ) parser.add_argument( "--neo4j-password", help="Specify neo4j password.", metavar="", + action="append", # default=argparse.SUPPRESS, ) parser.add_argument( "--neo4j-uri", help="Specify neo4j uri. eg bolt://localhost:7687", metavar="", + action="append", # default=argparse.SUPPRESS, ) diff --git a/cmflib/commands/init/minioS3.py b/cmflib/commands/init/minioS3.py index 2c6b90ba..197e709a 100644 --- a/cmflib/commands/init/minioS3.py +++ b/cmflib/commands/init/minioS3.py @@ -32,11 +32,34 @@ from cmflib.utils.cmf_config import CmfConfig from cmflib.utils.helper_functions import is_git_repo from cmflib.cmf_exception_handling import Neo4jArgumentNotProvided, CmfInitComplete, CmfInitFailed +from cmflib.cmf_exception_handling import MissingArgument, DuplicateArgumentNotAllowed class CmdInitMinioS3(CmdBase): def run(self): # Reading CONFIG_FILE variable cmf_config = os.environ.get("CONFIG_FILE", ".cmfconfig") + + required_args = { + "url": self.args.url, + "endpoint-url": self.args.endpoint_url, + "access-key-id": self.args.access_key_id, + "secret-key": self.args.secret_key, + "git-remote-url": self.args.git_remote_url, + "cmf-server-url" : self.args.cmf_server_url, + "neo4j-user" : self.args.neo4j_user, + "neo4j-password" : self.args.neo4j_password, + "neo4j_uri" : self.args.neo4j_uri + } + + for arg_name, arg_value in required_args.items(): + if arg_value: + if arg_name == "cmf-server-url" and len(arg_value) > 2: + raise DuplicateArgumentNotAllowed(arg_name,("--"+arg_name)) + if arg_value[0] == "": + raise MissingArgument(arg_name) + elif len(arg_value) > 1: + raise DuplicateArgumentNotAllowed(arg_name,("--"+arg_name)) + # checking if config file exists if not os.path.exists(cmf_config): # writing default value to config file @@ -53,14 +76,14 @@ def run(self): # read --neo4j details and add to the exsting file if self.args.neo4j_user and self.args.neo4j_password and self.args.neo4j_uri: attr_dict = {} - attr_dict["user"] = self.args.neo4j_user - attr_dict["password"] = self.args.neo4j_password - attr_dict["uri"] = self.args.neo4j_uri + attr_dict["user"] = self.args.neo4j_user[0] + attr_dict["password"] = self.args.neo4j_password[0] + attr_dict["uri"] = self.args.neo4j_uri[0] CmfConfig.write_config(cmf_config, "neo4j", attr_dict, True) elif ( - not self.args.neo4j_user - and not self.args.neo4j_password - and not self.args.neo4j_uri + not self.args.neo4j_user[0] + and not self.args.neo4j_password[0] + and not self.args.neo4j_uri[0] ): pass else: @@ -72,10 +95,10 @@ def run(self): git_quiet_init() git_checkout_new_branch(branch_name) git_initial_commit() - git_add_remote(self.args.git_remote_url) + git_add_remote(self.args.git_remote_url[0]) print("git init complete.") else: - git_modify_remote_url(self.args.git_remote_url) + git_modify_remote_url(self.args.git_remote_url[0]) print("git init complete.") @@ -86,9 +109,9 @@ def run(self): if not output: raise CmfInitFailed print(output) - dvc_add_attribute(repo_type, "endpointurl", self.args.endpoint_url) - dvc_add_attribute(repo_type, "access_key_id", self.args.access_key_id) - dvc_add_attribute(repo_type, "secret_access_key", self.args.secret_key) + dvc_add_attribute(repo_type, "endpointurl", self.args.endpoint_url[0]) + dvc_add_attribute(repo_type, "access_key_id", self.args.access_key_id[0]) + dvc_add_attribute(repo_type, "secret_access_key", self.args.secret_key[0]) status = CmfInitComplete() return status @@ -110,6 +133,7 @@ def add_parser(subparsers, parent_parser): "--url", required=True, help="Specify Minio S3 bucket url.", + action="append", metavar="", default=argparse.SUPPRESS, ) @@ -118,6 +142,7 @@ def add_parser(subparsers, parent_parser): "--endpoint-url", required=True, help="Specify endpoint url which is used to access Minio's locally/remotely running UI.", + action="append", metavar="", default=argparse.SUPPRESS, ) @@ -127,6 +152,7 @@ def add_parser(subparsers, parent_parser): required=True, help="Specify Access Key Id.", metavar="", + action="append", default=argparse.SUPPRESS, ) @@ -134,6 +160,7 @@ def add_parser(subparsers, parent_parser): "--secret-key", required=True, help="Specify Secret Key.", + action="append", metavar="", default=argparse.SUPPRESS, ) @@ -143,6 +170,7 @@ def add_parser(subparsers, parent_parser): required=True, help="Specify git repo url. eg: https://github.com/XXX/example.git", metavar="", + action="append", default=argparse.SUPPRESS, ) @@ -157,18 +185,21 @@ def add_parser(subparsers, parent_parser): "--neo4j-user", help="Specify neo4j user.", metavar="", + action="append", # default=argparse.SUPPRESS, ) parser.add_argument( "--neo4j-password", help="Specify neo4j password.", metavar="", + action="append", # default=argparse.SUPPRESS, ) parser.add_argument( "--neo4j-uri", help="Specify neo4j uri.eg bolt://localhost:7687", metavar="", + action="append", # default=argparse.SUPPRESS, ) diff --git a/cmflib/commands/init/sshremote.py b/cmflib/commands/init/sshremote.py index ca1636fb..25f0428c 100644 --- a/cmflib/commands/init/sshremote.py +++ b/cmflib/commands/init/sshremote.py @@ -31,12 +31,34 @@ ) from cmflib.utils.cmf_config import CmfConfig from cmflib.utils.helper_functions import is_git_repo -from cmflib.cmf_exception_handling import Neo4jArgumentNotProvided, CmfInitComplete, CmfInitFailed +from cmflib.cmf_exception_handling import Neo4jArgumentNotProvided, CmfInitComplete, CmfInitFailed, DuplicateArgumentNotAllowed, MissingArgument class CmdInitSSHRemote(CmdBase): def run(self): # Reading CONFIG_FILE variable cmf_config = os.environ.get("CONFIG_FILE", ".cmfconfig") + + required_args = { + "path": self.args.path, + "user": self.args.user, + "port": self.args.port, + "password": self.args.password, + "git-remote-url": self.args.git_remote_url, + "cmf-server-url" : self.args.cmf_server_url, + "neo4j-user" : self.args.neo4j_user, + "neo4j-password" : self.args.neo4j_password, + "neo4j_uri" : self.args.neo4j_uri + } + + for arg_name, arg_value in required_args.items(): + if arg_value: + if arg_name == "cmf-server-url" and len(arg_value) > 2: + raise DuplicateArgumentNotAllowed(arg_name,("--"+arg_name)) + if arg_value[0] == "": + raise MissingArgument(arg_name) + elif len(arg_value) > 1: + raise DuplicateArgumentNotAllowed(arg_name,("--"+arg_name)) + # checking if config file exists if not os.path.exists(cmf_config): # writing default value to config file @@ -53,14 +75,14 @@ def run(self): # read --neo4j details and add to the exsting file if self.args.neo4j_user and self.args.neo4j_password and self.args.neo4j_uri: attr_dict = {} - attr_dict["user"] = self.args.neo4j_user - attr_dict["password"] = self.args.neo4j_password - attr_dict["uri"] = self.args.neo4j_uri + attr_dict["user"] = self.args.neo4j_user[0] + attr_dict["password"] = self.args.neo4j_password[0] + attr_dict["uri"] = self.args.neo4j_uri[0] CmfConfig.write_config(cmf_config, "neo4j", attr_dict, True) elif ( - not self.args.neo4j_user - and not self.args.neo4j_password - and not self.args.neo4j_uri + not self.args.neo4j_user[0] + and not self.args.neo4j_password[0] + and not self.args.neo4j_uri[0] ): pass else: @@ -72,19 +94,19 @@ def run(self): git_quiet_init() git_checkout_new_branch(branch_name) git_initial_commit() - git_add_remote(self.args.git_remote_url) + git_add_remote(self.args.git_remote_url[0]) print("git init complete.") print("Starting cmf init.") repo_type = "ssh-storage" dvc_quiet_init() - output = dvc_add_remote_repo(repo_type, self.args.path) + output = dvc_add_remote_repo(repo_type, self.args.path[0]) if not output: raise CmfInitFailed print(output) - dvc_add_attribute(repo_type, "user", self.args.user) - dvc_add_attribute(repo_type, "password", self.args.password) - dvc_add_attribute(repo_type, "port", self.args.port) + dvc_add_attribute(repo_type, "user", self.args.user[0]) + dvc_add_attribute(repo_type, "password", self.args.password[0]) + dvc_add_attribute(repo_type, "port", self.args.port[0]) status = CmfInitComplete() return status @@ -107,6 +129,7 @@ def add_parser(subparsers, parent_parser): required=True, help="Specify remote ssh directory path.", metavar="", + action="append", default=argparse.SUPPRESS, ) @@ -115,6 +138,7 @@ def add_parser(subparsers, parent_parser): required=True, help="Specify username.", metavar="", + action="append", default=argparse.SUPPRESS, ) @@ -123,6 +147,7 @@ def add_parser(subparsers, parent_parser): required=True, help="Specify port.", metavar="", + action="append", default=argparse.SUPPRESS, ) @@ -131,6 +156,7 @@ def add_parser(subparsers, parent_parser): required=True, help="Specify password. This will be saved only on local", metavar="", + action="append", default=argparse.SUPPRESS, ) @@ -139,6 +165,7 @@ def add_parser(subparsers, parent_parser): required=True, help="Specify git repo url. eg: https://github.com/XXX/example.git", metavar="", + action="append", default=argparse.SUPPRESS, ) @@ -153,18 +180,21 @@ def add_parser(subparsers, parent_parser): "--neo4j-user", help="Specify neo4j user.", metavar="", + action="append", # default=argparse.SUPPRESS, ) parser.add_argument( "--neo4j-password", help="Specify neo4j password.", metavar="", + action="append", # default=argparse.SUPPRESS, ) parser.add_argument( "--neo4j-uri", help="Specify neo4j uri.eg bolt://localhost:7687", metavar="", + action="append", # default=argparse.SUPPRESS, ) From d185eb6959543914259f12637f018a9feeed4bd9 Mon Sep 17 00:00:00 2001 From: AyeshaSanadi Date: Mon, 20 Jan 2025 16:32:31 +0530 Subject: [PATCH 36/41] Added code for execution uuid, repo pull push command and apply validation for command. --- cmflib/cmf_exception_handling.py | 8 +- cmflib/cmf_merger.py | 9 +- cmflib/cmfquery.py | 22 ++-- cmflib/commands/execution/list.py | 84 ++++++------- cmflib/commands/init/amazonS3.py | 28 ++--- cmflib/commands/init/local.py | 19 ++- cmflib/commands/init/minioS3.py | 28 ++--- cmflib/commands/init/sshremote.py | 25 ++-- cmflib/commands/metadata/pull.py | 34 ++--- cmflib/commands/metadata/push.py | 43 ++++--- cmflib/commands/repo/pull.py | 26 ++-- cmflib/commands/repo/push.py | 131 +++++++++++++++++--- cmflib/dvc_wrapper.py | 28 +++-- cmflib/server_interface/server_interface.py | 8 +- server/app/get_data.py | 24 ++-- server/app/main.py | 2 +- ui/src/components/ExecutionTable/index.jsx | 3 + 17 files changed, 316 insertions(+), 206 deletions(-) diff --git a/cmflib/cmf_exception_handling.py b/cmflib/cmf_exception_handling.py index 5cc80678..12b63013 100644 --- a/cmflib/cmf_exception_handling.py +++ b/cmflib/cmf_exception_handling.py @@ -201,13 +201,13 @@ def handle(self): return f"ERROR: Executions not found." -class ExecutionIDNotFound(CmfFailure): - def __init__(self, exec_id, return_code=105): - self.exec_id = exec_id +class ExecutionUUIDNotFound(CmfFailure): + def __init__(self, exec_uuid, return_code=105): + self.exec_uuid = exec_uuid super().__init__(return_code) def handle(self): - return f"ERROR: Execution id {self.exec_id} is not present in mlmd." + return f"ERROR: Execution uuid {self.exec_uuid} is not present in mlmd." class ArtifactNotFound(CmfFailure): diff --git a/cmflib/cmf_merger.py b/cmflib/cmf_merger.py index 82c6082d..70d6494a 100644 --- a/cmflib/cmf_merger.py +++ b/cmflib/cmf_merger.py @@ -23,7 +23,7 @@ from ml_metadata.proto import metadata_store_pb2 as mlpb from typing import Union -def parse_json_to_mlmd(mlmd_json, path_to_store: str, cmd: str, exec_id: Union[str, int]) -> Union[str, None]: +def parse_json_to_mlmd(mlmd_json, path_to_store: str, cmd: str, exec_uuid: Union[str, str]) -> Union[str, None]: try: mlmd_data = json.loads(mlmd_json) pipelines = mlmd_data["Pipeline"] @@ -52,17 +52,16 @@ def parse_json_to_mlmd(mlmd_json, path_to_store: str, cmd: str, exec_id: Union[s graph=graph, is_server=True) for stage in data["Pipeline"][0]["stages"]: # Iterates over all the stages - if exec_id is None: #if exec_id is None we pass all the executions. + if exec_uuid is None: #if exec_uuid is None we pass all the executions. list_executions = [execution for execution in stage["executions"]] - elif exec_id is not None: # elif exec_id is not None, we pass executions for that specific id. + elif exec_uuid is not None: # elif exec_uuid is not None, we pass executions for that specific uuid. list_executions = [ execution for execution in stage["executions"] - if execution["id"] == int(exec_id) + if exec_uuid in execution['properties']["Execution_uuid"].split(",") ] else: return "Invalid execution id given." - for execution in list_executions: # Iterates over all the executions try: _ = cmf_class.merge_created_context( diff --git a/cmflib/cmfquery.py b/cmflib/cmfquery.py index 47d2ac9f..6a40895a 100644 --- a/cmflib/cmfquery.py +++ b/cmflib/cmfquery.py @@ -239,18 +239,23 @@ def _get_stages(self, pipeline_id: int) -> t.List[mlpb.Context]: """ return self.store.get_children_contexts_by_context(pipeline_id) - def _get_executions(self, stage_id: int, execution_id: t.Optional[int] = None) -> t.List[mlpb.Execution]: + def _get_executions(self, stage_id: int, execution_uuid: t.Optional[str] = None) -> t.List[mlpb.Execution]: """Return executions of the given stage. Args: stage_id: Stage identifier. - execution_id: If not None, return only execution with this ID. + execution_uuid: If not None, return only execution with this uuid. Returns: List of executions matching input parameters. """ executions: t.List[mlpb.Execution] = self.store.get_executions_by_context(stage_id) - if execution_id is not None: - executions = [execution for execution in executions if execution.id == execution_id] + if execution_uuid is not None: + executions_list = executions + executions = [] + for execution in executions_list: + exec_uuid_list = execution.properties['Execution_uuid'].string_value.split(",") + if execution_uuid in exec_uuid_list: + executions.append(execution) return executions def _get_executions_by_input_artifact_id(self, artifact_id: int,pipeline_id: str = None) -> t.List[int]: @@ -889,17 +894,14 @@ def get_one_hop_parent_artifacts_with_id(self, artifact_id: int) -> pd.DataFrame ) return df - def dumptojson(self, pipeline_name: str, exec_id: t.Optional[int] = None) -> t.Optional[str]: + def dumptojson(self, pipeline_name: str, exec_uuid: t.Optional[str] = None) -> t.Optional[str]: """Return JSON-parsable string containing details about the given pipeline. Args: pipeline_name: Name of an AI pipelines. - exec_id: Optional stage execution ID - filter stages by this execution ID. + exec_uuid: Optional stage execution_uuid - filter stages by this execution_uuid. Returns: Pipeline in JSON format. """ - if exec_id is not None: - exec_id = int(exec_id) - def _get_node_attributes(_node: t.Union[mlpb.Context, mlpb.Execution, mlpb.Event], _attrs: t.Dict) -> t.Dict: for attr in CONTEXT_LIST: #Artifacts getattr call on Type was giving empty string, which was overwriting @@ -921,7 +923,7 @@ def _get_node_attributes(_node: t.Union[mlpb.Context, mlpb.Execution, mlpb.Event pipeline_attrs = _get_node_attributes(pipeline, {"stages": []}) for stage in self._get_stages(pipeline.id): stage_attrs = _get_node_attributes(stage, {"executions": []}) - for execution in self._get_executions(stage.id, execution_id=exec_id): + for execution in self._get_executions(stage.id, execution_uuid=exec_uuid): # name will be an empty string for executions that are created with # create new execution as true(default) # In other words name property will there only for execution diff --git a/cmflib/commands/execution/list.py b/cmflib/commands/execution/list.py index 4771c777..8ea705f4 100644 --- a/cmflib/commands/execution/list.py +++ b/cmflib/commands/execution/list.py @@ -28,7 +28,7 @@ DuplicateArgumentNotAllowed, MissingArgument, MsgSuccess, - ExecutionsNotFound + ExecutionUUIDNotFound ) class CmdExecutionList(CmdBase): @@ -119,47 +119,45 @@ def run(self): df = df.drop(['Python_Env'], axis=1) # Type of df is series of integers. # Process execution ID if provided - if not self.args.execution_id: # If self.args.execution_id is None or an empty list ([]). + if not self.args.execution_uuid: # If self.args.execution_uuid is None or an empty list ([]). pass - elif len(self.args.execution_id) > 1: # If the user provided more than one execution_id. - raise DuplicateArgumentNotAllowed("execution_id", "-e") - elif not self.args.execution_id[0]: # self.args.execution_id[0] is an empty string (""). - raise MissingArgument("execution id") + elif len(self.args.execution_uuid) > 1: # If the user provided more than one execution_uuid. + raise DuplicateArgumentNotAllowed("execution_uuid", "-e") + elif not self.args.execution_uuid[0]: # self.args.execution_uuid[0] is an empty string (""). + raise MissingArgument("execution uuid") else: - if self.args.execution_id[0].isdigit(): - if int(self.args.execution_id[0]) in list(df['id']): # Converting series to list. - df = df.query(f'id == {int(self.args.execution_id[0])}') # Used dataframe based on execution id - - # Rearranging columns: Start with fixed columns and appending the remaining columns. - updated_columns = ["id", "Context_Type", "Execution", "Execution_uuid", "name", "Pipeline_Type", "Git_Repo"] - updated_columns += [ col for col in df.columns if col not in updated_columns] - - df = df[updated_columns] - - # Drop columns that start with 'custom_properties_' and that contains NaN values - columns_to_drop = [col for col in df.columns if col.startswith('custom_properties_') and df[col].isna().any()] - df = df.drop(columns=columns_to_drop) - - # Wrap text in object-type columns to a width of 30 characters. - for col in df.select_dtypes(include=['object']).columns: - df[col] = df[col].apply(lambda x: textwrap.fill(x, width=30) if isinstance(x, str) else x) - - # Set 'id' as the DataFrame index and transpose it for display horizontally. - df.set_index("id", inplace=True) - df = df.T.reset_index() - df.columns.values[0] = 'id' # Rename the first column back to 'id'. - - # Display the updated DataFrame as a formatted table. - table = tabulate( - df, - headers=df.columns, - tablefmt="grid", - showindex=False, - ) - print(table) - print() - return MsgSuccess(msg_str = "Done.") - raise ExecutionsNotFound(self.args.execution_id[0]) + df = df[df['Execution_uuid'].apply(lambda x: self.args.execution_uuid[0] in x.split(","))] # Used dataframe based on execution uuid + if not df.empty: + # Rearranging columns: Start with fixed columns and appending the remaining columns. + updated_columns = ["id", "Context_Type", "Execution", "Execution_uuid", "name", "Pipeline_Type", "Git_Repo"] + updated_columns += [ col for col in df.columns if col not in updated_columns] + + df = df[updated_columns] + + # Drop columns that start with 'custom_properties_' and that contains NaN values + columns_to_drop = [col for col in df.columns if col.startswith('custom_properties_') and df[col].isna().any()] + df = df.drop(columns=columns_to_drop) + + # Wrap text in object-type columns to a width of 30 characters. + for col in df.select_dtypes(include=['object']).columns: + df[col] = df[col].apply(lambda x: textwrap.fill(x, width=30) if isinstance(x, str) else x) + + # Set 'id' as the DataFrame index and transpose it for display horizontally. + df.set_index("id", inplace=True) + df = df.T.reset_index() + df.columns.values[0] = 'id' # Rename the first column back to 'id'. + + # Display the updated DataFrame as a formatted table. + table = tabulate( + df, + headers=df.columns, + tablefmt="grid", + showindex=False, + ) + print(table) + print() + return MsgSuccess(msg_str = "Done.") + return ExecutionUUIDNotFound(self.args.execution_uuid[0]) self.display_table(df) return MsgSuccess(msg_str = "Done.") @@ -197,10 +195,10 @@ def add_parser(subparsers, parent_parser): parser.add_argument( "-e", - "--execution_id", + "--execution_uuid", action="append", - help="Specify the execution id to retrieve execution.", - metavar="", + help="Specify the execution uuid to retrieve execution.", + metavar="", ) parser.set_defaults(func=CmdExecutionList) \ No newline at end of file diff --git a/cmflib/commands/init/amazonS3.py b/cmflib/commands/init/amazonS3.py index b5abb1a5..69e4e2af 100644 --- a/cmflib/commands/init/amazonS3.py +++ b/cmflib/commands/init/amazonS3.py @@ -40,21 +40,17 @@ def run(self): cmf_config = os.environ.get("CONFIG_FILE", ".cmfconfig") required_args = { - "url": self.args.url, - "access-key-id": self.args.access_key_id, - "secret-key": self.args.secret_key, - "git-remote-url": self.args.git_remote_url, - "session-token" : self.args.session_token, - "cmf-server-url" : self.args.cmf_server_url, - "neo4j-user" : self.args.neo4j_user, - "neo4j-password" : self.args.neo4j_password, - "neo4j_uri" : self.args.neo4j_uri - + "url": self.args.url, + "access-key-id": self.args.access_key_id, + "secret-key": self.args.secret_key, + "git-remote-url": self.args.git_remote_url, + "session-token" : self.args.session_token, + "neo4j-user" : self.args.neo4j_user, + "neo4j-password" : self.args.neo4j_password, + "neo4j_uri" : self.args.neo4j_uri } for arg_name, arg_value in required_args.items(): if arg_value: - if arg_name == "cmf-server-url" and len(arg_value) > 2: - raise DuplicateArgumentNotAllowed(arg_name,("--"+arg_name)) if arg_value[0] == "": raise MissingArgument(arg_name) elif len(arg_value) > 1: @@ -78,10 +74,10 @@ def run(self): attr_dict["password"] = self.args.neo4j_password[0] attr_dict["uri"] = self.args.neo4j_uri[0] CmfConfig.write_config(cmf_config, "neo4j", attr_dict, True) - elif ( - not self.args.neo4j_user[0] - and not self.args.neo4j_password[0] - and not self.args.neo4j_uri[0] + elif( + not self.args.neo4j_user + and not self.args.neo4j_password + and not self.args.neo4j_uri ): pass else: diff --git a/cmflib/commands/init/local.py b/cmflib/commands/init/local.py index 201c4eae..718e08b9 100644 --- a/cmflib/commands/init/local.py +++ b/cmflib/commands/init/local.py @@ -38,18 +38,15 @@ def run(self): cmf_config = os.environ.get("CONFIG_FILE", ".cmfconfig") required_args = { - "path": self.args.path, - "git-remote-url": self.args.git_remote_url, - "cmf-server-url" : self.args.cmf_server_url, - "neo4j-user" : self.args.neo4j_user, - "neo4j-password" : self.args.neo4j_password, - "neo4j_uri" : self.args.neo4j_uri + "path": self.args.path, + "git-remote-url": self.args.git_remote_url, + "neo4j-user" : self.args.neo4j_user, + "neo4j-password" : self.args.neo4j_password, + "neo4j_uri" : self.args.neo4j_uri } for arg_name, arg_value in required_args.items(): if arg_value: - if arg_name == "cmf-server-url" and len(arg_value) > 2: - raise DuplicateArgumentNotAllowed(arg_name,("--"+arg_name)) if arg_value[0] == "": raise MissingArgument(arg_name) elif len(arg_value) > 1: @@ -76,9 +73,9 @@ def run(self): attr_dict["uri"] = self.args.neo4j_uri[0] CmfConfig.write_config(cmf_config, "neo4j", attr_dict, True) elif ( - not self.args.neo4j_user[0] - and not self.args.neo4j_password[0] - and not self.args.neo4j_uri[0] + not self.args.neo4j_user + and not self.args.neo4j_password + and not self.args.neo4j_uri ): pass else: diff --git a/cmflib/commands/init/minioS3.py b/cmflib/commands/init/minioS3.py index 197e709a..f0b80db7 100644 --- a/cmflib/commands/init/minioS3.py +++ b/cmflib/commands/init/minioS3.py @@ -40,21 +40,18 @@ def run(self): cmf_config = os.environ.get("CONFIG_FILE", ".cmfconfig") required_args = { - "url": self.args.url, - "endpoint-url": self.args.endpoint_url, - "access-key-id": self.args.access_key_id, - "secret-key": self.args.secret_key, - "git-remote-url": self.args.git_remote_url, - "cmf-server-url" : self.args.cmf_server_url, - "neo4j-user" : self.args.neo4j_user, - "neo4j-password" : self.args.neo4j_password, - "neo4j_uri" : self.args.neo4j_uri + "url": self.args.url, + "endpoint-url": self.args.endpoint_url, + "access-key-id": self.args.access_key_id, + "secret-key": self.args.secret_key, + "git-remote-url": self.args.git_remote_url, + "neo4j-user" : self.args.neo4j_user, + "neo4j-password" : self.args.neo4j_password, + "neo4j_uri" : self.args.neo4j_uri } for arg_name, arg_value in required_args.items(): if arg_value: - if arg_name == "cmf-server-url" and len(arg_value) > 2: - raise DuplicateArgumentNotAllowed(arg_name,("--"+arg_name)) if arg_value[0] == "": raise MissingArgument(arg_name) elif len(arg_value) > 1: @@ -81,9 +78,9 @@ def run(self): attr_dict["uri"] = self.args.neo4j_uri[0] CmfConfig.write_config(cmf_config, "neo4j", attr_dict, True) elif ( - not self.args.neo4j_user[0] - and not self.args.neo4j_password[0] - and not self.args.neo4j_uri[0] + not self.args.neo4j_user + and not self.args.neo4j_password + and not self.args.neo4j_uri ): pass else: @@ -105,7 +102,7 @@ def run(self): print("Starting cmf init.") dvc_quiet_init() repo_type = "minio" - output = dvc_add_remote_repo(repo_type, self.args.url) + output = dvc_add_remote_repo(repo_type, self.args.url[0]) if not output: raise CmfInitFailed print(output) @@ -116,7 +113,6 @@ def run(self): return status - def add_parser(subparsers, parent_parser): HELP = "Initialises Minio S3 bucket as artifact repository." diff --git a/cmflib/commands/init/sshremote.py b/cmflib/commands/init/sshremote.py index 25f0428c..a85badae 100644 --- a/cmflib/commands/init/sshremote.py +++ b/cmflib/commands/init/sshremote.py @@ -39,21 +39,18 @@ def run(self): cmf_config = os.environ.get("CONFIG_FILE", ".cmfconfig") required_args = { - "path": self.args.path, - "user": self.args.user, - "port": self.args.port, - "password": self.args.password, - "git-remote-url": self.args.git_remote_url, - "cmf-server-url" : self.args.cmf_server_url, - "neo4j-user" : self.args.neo4j_user, - "neo4j-password" : self.args.neo4j_password, - "neo4j_uri" : self.args.neo4j_uri + "path": self.args.path, + "user": self.args.user, + "port": self.args.port, + "password": self.args.password, + "git-remote-url": self.args.git_remote_url, + "neo4j-user" : self.args.neo4j_user, + "neo4j-password" : self.args.neo4j_password, + "neo4j_uri" : self.args.neo4j_uri } for arg_name, arg_value in required_args.items(): if arg_value: - if arg_name == "cmf-server-url" and len(arg_value) > 2: - raise DuplicateArgumentNotAllowed(arg_name,("--"+arg_name)) if arg_value[0] == "": raise MissingArgument(arg_name) elif len(arg_value) > 1: @@ -80,9 +77,9 @@ def run(self): attr_dict["uri"] = self.args.neo4j_uri[0] CmfConfig.write_config(cmf_config, "neo4j", attr_dict, True) elif ( - not self.args.neo4j_user[0] - and not self.args.neo4j_password[0] - and not self.args.neo4j_uri[0] + not self.args.neo4j_user + and not self.args.neo4j_password + and not self.args.neo4j_uri ): pass else: diff --git a/cmflib/commands/metadata/pull.py b/cmflib/commands/metadata/pull.py index 417e0b37..0cec9508 100644 --- a/cmflib/commands/metadata/pull.py +++ b/cmflib/commands/metadata/pull.py @@ -26,7 +26,8 @@ DuplicateArgumentNotAllowed, PipelineNotFound, MissingArgument, - CmfNotConfigured, ExecutionIDNotFound, + CmfNotConfigured, + ExecutionUUIDNotFound, MlmdNotFoundOnServer, MlmdFilePullSuccess, CmfServerNotAvailable, @@ -53,17 +54,19 @@ def run(self): full_path_to_dump = "" cmd = "pull" status = 0 - exec_id = None + exec_uuid = None if self.args.pipeline_name is not None and len(self.args.pipeline_name) > 1: raise DuplicateArgumentNotAllowed("pipeline_name", "-p") elif not self.args.pipeline_name[0]: # self.args.pipeline_name[0] is an empty string (""). raise MissingArgument("pipeline name") - if not self.args.execution: # If self.args.execution[0] is None or an empty list ([]). - pass - elif len(self.args.execution) > 1: # If the user provided more than one execution id. - raise DuplicateArgumentNotAllowed("execution id", "-e") - elif not self.args.execution[0]: # self.args.execution[0] is an empty string (""). - raise MissingArgument("execution id") + + if not self.args.execution_uuid: # If self.args.execution_uuid[0] is None or an empty list ([]). + pass + elif len(self.args.execution_uuid) > 1: # If the user provided more than one execution_uuid. + raise DuplicateArgumentNotAllowed("execution_uuid", "-e") + elif not self.args.execution_uuid[0]: # self.args.execution_uuid[0] is an empty string (""). + raise MissingArgument("execution_uuid") + if self.args.file_name: # setting directory where mlmd file will be dumped if len(self.args.file_name) > 1: # If the user provided more than one file name. raise DuplicateArgumentNotAllowed("file_name", "-f") @@ -81,23 +84,24 @@ def run(self): raise FileNameNotfound else: full_path_to_dump = os.getcwd() + "/mlmd" - if self.args.execution: - exec_id = self.args.execution[0] + + if self.args.execution_uuid: + exec_uuid = self.args.execution_uuid[0] output = server_interface.call_mlmd_pull( - url, self.args.pipeline_name[0], exec_id + url, self.args.pipeline_name[0], exec_uuid ) # calls cmf-server api to get mlmd file data(Json format) status = output.status_code # checks If given pipeline does not exists/ elif pull mlmd file/ else mlmd file is not available if output.content.decode() == None: raise PipelineNotFound(self.args.pipeline_name[0]) - elif output.content.decode() == "no_exec_id": - raise ExecutionIDNotFound(exec_id) + elif output.content.decode() == "no_exec_uuid": + raise ExecutionUUIDNotFound(exec_uuid) elif output.content: if status == 200: try: cmf_merger.parse_json_to_mlmd( - output.content, full_path_to_dump, cmd, None + output.content, full_path_to_dump, cmd, exec_uuid ) # converts mlmd json data to mlmd file pull_status = MlmdFilePullSuccess(full_path_to_dump) return pull_status @@ -144,7 +148,7 @@ def add_parser(subparsers, parent_parser): ) parser.add_argument( - "-e", "--execution", action="append", help="Specify Execution id", metavar="" + "-e", "--execution_uuid", action="append", help="Specify execution_uuid", metavar="" ) parser.set_defaults(func=CmdMetadataPull) diff --git a/cmflib/commands/metadata/push.py b/cmflib/commands/metadata/push.py index f1c07d8e..aff10260 100644 --- a/cmflib/commands/metadata/push.py +++ b/cmflib/commands/metadata/push.py @@ -29,7 +29,7 @@ MlmdFilePushSuccess, ExecutionsAlreadyExists, FileNotFound, - ExecutionIDNotFound, + ExecutionUUIDNotFound, PipelineNotFound, UpdateCmfVersion, CmfServerNotAvailable, @@ -91,32 +91,35 @@ def run(self): print("........................................") # converts mlmd file to json format json_payload = query.dumptojson(pipeline_name, None) - + # checks if execution is given by user - if not self.args.execution: # If self.args.execution is None or an empty list ([]). - exec_id = None - response = server_interface.call_mlmd_push(json_payload, url, exec_id, pipeline_name) - elif len(self.args.execution) > 1: # If the user provided more than one execution. - raise DuplicateArgumentNotAllowed("execution", "-e") - elif not self.args.execution[0]: # self.args.execution[0] is an empty string (""). - raise MissingArgument("execution id") - elif not self.args.execution[0].isdigit(): - raise ExecutionIDNotFound(self.args.execution[0]) + if not self.args.execution_uuid: # If self.args.execution_uuid is None or an empty list ([]). + exec_uuid = None + response = server_interface.call_mlmd_push(json_payload, url, exec_uuid, pipeline_name) + elif len(self.args.execution_uuid) > 1: # If the user provided more than one execution. + raise DuplicateArgumentNotAllowed("execution_uuid", "-e") + elif not self.args.execution_uuid[0]: # self.args.execution_uuid[0] is an empty string (""). + raise MissingArgument("execution_uuid") else: - exec_id = int(self.args.execution[0]) + exec_uuid = self.args.execution_uuid[0] mlmd_data = json.loads(json_payload)["Pipeline"] # checks if given execution present in mlmd for i in mlmd_data[0]["stages"]: for j in i["executions"]: - if j["id"] == int(exec_id): + # created exec_uuid of list if multiple uuid present for single execution. + # for eg: f9da581c-d16c-11ef-9809-9350156ed1ac,32f17f4a-d16d-11ef-9809-9350156ed1ac + uuid_list = j['properties']['Execution_uuid'].split(",") + # check if user specified exec_uuid exists inside local mlmd + if exec_uuid in uuid_list: execution_flag = 1 - # calling mlmd_push api to push mlmd file to cmf-server + # calling mlmd_push api to push mlmd_data = json.loads(json_payload)["Pipeline"] + # checks if given execution present in mlmdmlmd file to cmf-server response = server_interface.call_mlmd_push( - json_payload, url, exec_id, pipeline_name + json_payload, url, exec_uuid, pipeline_name ) break if execution_flag == 0: - raise ExecutionIDNotFound(exec_id) + raise ExecutionUUIDNotFound(exec_uuid) status_code = response.status_code if status_code == 200: output = "" @@ -127,7 +130,6 @@ def run(self): if response.json()["status"]=="exists": display_output = "Executions already exists." output = ExecutionsAlreadyExists() - if not self.args.tensorboard: return output elif len(self.args.tensorboard) > 1: # If the user provided more than one tensorboard name. @@ -141,6 +143,7 @@ def run(self): print("tensorboard logs upload started!!") print("......................................") + tensorboard = self.args.tensorboard[0] # check if the path provided is for a file if os.path.isfile(tensorboard): @@ -212,10 +215,10 @@ def add_parser(subparsers, parent_parser): parser.add_argument( "-e", - "--execution", + "--execution_uuid", action="append", - help="Specify Execution id.", - metavar="", + help="Specify Execution uuid.", + metavar="", ) parser.add_argument( diff --git a/cmflib/commands/repo/pull.py b/cmflib/commands/repo/pull.py index a129cd01..f5e75f26 100644 --- a/cmflib/commands/repo/pull.py +++ b/cmflib/commands/repo/pull.py @@ -19,7 +19,7 @@ import requests from cmflib.cli.command import CmdBase -from cmflib.dvc_wrapper import git_get_repo, git_get_pull +from cmflib.dvc_wrapper import git_get_repo, git_get_pull, git_get_branch from cmflib.commands.artifact.pull import CmdArtifactPull from cmflib.commands.metadata.pull import CmdMetadataPull from cmflib.cmf_exception_handling import MsgSuccess, MsgFailure @@ -49,18 +49,19 @@ def git_pull(self): # Getting github url from cmf init command url = git_get_repo() # Example url = https://github.com/ABC/my-repo - url = url.split("/") + splited_url = url.split("/") + branch_name = git_get_branch()[0] # Check whether branch exists in git repo or not # url[-2] = ABC, url-1] = my-repo - if self.branch_exists(url[-2], url[-1], "mlmd"): + if self.branch_exists(splited_url[-2], splited_url[-1], branch_name): # pull the code from mlmd branch print("git pull started...") - stdout, stderr, returncode = git_get_pull() + stdout, stderr, returncode = git_get_pull(branch_name) if returncode != 0: - raise MsgFailure(msg_str=f"Error pulling changes: {stderr}") + raise MsgFailure(msg_str=f"{stderr}") return MsgSuccess(msg_str=stdout) else: - raise MsgFailure(msg_str="Branch 'mlmd' does not exists!!") + raise MsgFailure(msg_str=f"{branch_name} inside {url} does not exists!!") def run(self): print("metadata pull started...") @@ -104,17 +105,20 @@ def add_parser(subparsers, parent_parser): parser.add_argument( "-e", - "--execution", + "--execution_uuid", action="append", - help="Specify Execution id.", - metavar="", + help="Specify Execution uuid.", + metavar="", ) + # The 'artifact_name' parameter is used inside 'cmf artifact pull' command. + # To avoid errors, it is defined here with a default value of 'None' and hidden from the help text using 'argparse.SUPPRESS'. parser.add_argument( "-a", "--artifact_name", - action="append", - help="Specify artifact name.", + action="store_const", + const="None", + help=argparse.SUPPRESS, metavar="", ) diff --git a/cmflib/commands/repo/push.py b/cmflib/commands/repo/push.py index 918c075c..92df8a8e 100644 --- a/cmflib/commands/repo/push.py +++ b/cmflib/commands/repo/push.py @@ -17,12 +17,26 @@ #!/usr/bin/env python3 import argparse import requests - +import os +import re + +from cmflib import cmfquery +from cmflib.cli.utils import check_minio_server, find_root +from cmflib.utils.helper_functions import generate_osdf_token +from cmflib.utils.dvc_config import DvcConfig +from cmflib.dvc_wrapper import dvc_add_attribute +from cmflib.utils.cmf_config import CmfConfig from cmflib.cli.command import CmdBase -from cmflib.dvc_wrapper import git_get_repo, git_get_pull, git_get_push +from cmflib.dvc_wrapper import git_get_repo, git_get_pull, git_get_push, git_get_branch, dvc_push from cmflib.commands.artifact.push import CmdArtifactPush from cmflib.commands.metadata.push import CmdMetadataPush -from cmflib.cmf_exception_handling import MsgSuccess, MsgFailure +from cmflib.cmf_exception_handling import ( + MsgSuccess, + MsgFailure, + ArtifactPushSuccess, + Minios3ServerInactive, + CmfNotConfigured, + FileNotFound,) class CmdRepoPush(CmdBase): @@ -50,32 +64,111 @@ def git_push(self): url = git_get_repo() # Example url = https://github.com/ABC/my-repo url = url.split("/") + branch_name = git_get_branch()[0] # Check whether branch exists in git repo or not # url[-2] = ABC, url-1] = my-repo - if self.branch_exists(url[-2], url[-1], "mlmd"): + if self.branch_exists(url[-2], url[-1], branch_name): # 1. pull the code from mlmd branch # 2. push the code inside mlmd branch - stdout, stderr, returncode = git_get_pull() + stdout, stderr, returncode = git_get_pull(branch_name) if returncode != 0: - raise MsgFailure(msg_str=f"Error pulling changes: {stderr}") + raise MsgFailure(msg_str=f"{stderr}") print(stdout) # push the code inside mlmd branch - stdout, stderr, returncode = git_get_push() + stdout, stderr, returncode = git_get_push(branch_name) if returncode != 0: - raise MsgFailure(msg_str=f"Error pushing changes: {stderr}") + raise MsgFailure(msg_str=f"{stderr}") return MsgSuccess(msg_str="Successfully pushed and pulled changes!") + + def artifact_push(self): + result = "" + dvc_config_op = DvcConfig.get_dvc_config() + cmf_config_file = os.environ.get("CONFIG_FILE", ".cmfconfig") + + # find root_dir of .cmfconfig + output = find_root(cmf_config_file) + + # in case, there is no .cmfconfig file + if output.find("'cmf' is not configured.") != -1: + raise CmfNotConfigured(output) + + out_msg = check_minio_server(dvc_config_op) + if dvc_config_op["core.remote"] == "minio" and out_msg != "SUCCESS": + raise Minios3ServerInactive() + if dvc_config_op["core.remote"] == "osdf": + config_file_path = os.path.join(output, cmf_config_file) + cmf_config={} + cmf_config=CmfConfig.read_config(config_file_path) + #print("key_id="+cmf_config["osdf-key_id"]) + dynamic_password = generate_osdf_token(cmf_config["osdf-key_id"],cmf_config["osdf-key_path"],cmf_config["osdf-key_issuer"]) + #print("Dynamic Password"+dynamic_password) + dvc_add_attribute(dvc_config_op["core.remote"],"password",dynamic_password) + #The Push URL will be something like: https:///files/md5/[First Two of MD5 Hash] + result = dvc_push() + return result + + current_directory = os.getcwd() + if not self.args.file_name: # If self.args.file_name is None or an empty list ([]). + mlmd_file_name = "./mlmd" # Default path for mlmd file name. + else: + mlmd_file_name = self.args.file_name[0] + if mlmd_file_name == "mlmd": + mlmd_file_name = "./mlmd" + current_directory = os.path.dirname(mlmd_file_name) + if not os.path.exists(mlmd_file_name): #checking if MLMD files exists + raise FileNotFound(mlmd_file_name, current_directory) + + # creating cmfquery object + query = cmfquery.CmfQuery(mlmd_file_name) + names = [] + df = query.get_all_executions_in_pipeline(self.args.pipeline_name[0]) + # fetching execution id from df based on execution_uuid + exec_id_df = df[df['Execution_uuid'].apply(lambda x: self.args.execution_uuid[0] in x.split(","))]['id'] + exec_id = int (exec_id_df.iloc[0]) + + artifacts = query.get_all_artifacts_for_execution(exec_id) # getting all artifacts based on execution id + # dropping artifact with type 'metrics' as metrics doesn't have physical file + if not artifacts.empty: + artifacts = artifacts[artifacts['type'] != 'Metrics'] + # adding .dvc at the end of every file as it is needed for pull + artifacts['name'] = artifacts['name'].apply(lambda name: f"{name.split(':')[0]}.dvc") + names.extend(artifacts['name'].tolist()) + + final_list = [] + for file in set(names): + # checking if the .dvc exists + if os.path.exists(file): + final_list.append(file) + # checking if the .dvc exists in user's project working directory + elif os.path.isabs(file): + file = re.split("/",file)[-1] + file = os.path.join(os.getcwd(), file) + if os.path.exists(file): + final_list.append(file) + else: + # not adding the .dvc to the final list in case .dvc doesn't exists in both the places + pass + result = dvc_push(list(final_list)) + return ArtifactPushSuccess(result) def run(self): - print("Executing cmf artifact push command..") - artifact_push_instance = CmdArtifactPush(self.args) - if artifact_push_instance.run().status == "success": - print("Executing cmf metadata push command..") - metadata_push_instance = CmdMetadataPush(self.args) - if metadata_push_instance.run().status == "success": - print("Execution git push command..") + print("Executing cmf metadata push command..") + metadata_push_instance = CmdMetadataPush(self.args) + if metadata_push_instance.run().status == "success": + print("Executing cmf artifact push command..") + if(self.args.execution_uuid): + # If an execution uuid exists, push the artifacts associated with that execution. + artifact_push_result = self.artifact_push() + else: + # Pushing all artifacts. + artifact_push_instance = CmdArtifactPush(self.args) + artifact_push_result = artifact_push_instance.run() + + if artifact_push_result.status == "success": + print("Executing git push command..") return self.git_push() - + def add_parser(subparsers, parent_parser): PUSH_HELP = "Push artifacts, metadata files, and source code to the user's artifact repository, cmf-server, and git respectively." @@ -109,11 +202,11 @@ def add_parser(subparsers, parent_parser): parser.add_argument( "-e", - "--execution", + "--execution_uuid", action="append", - help="Specify Execution id.", + help="Specify Execution uuid.", default=None, - metavar="", + metavar="", ) parser.add_argument( diff --git a/cmflib/dvc_wrapper.py b/cmflib/dvc_wrapper.py index caf2661c..26239362 100644 --- a/cmflib/dvc_wrapper.py +++ b/cmflib/dvc_wrapper.py @@ -495,9 +495,9 @@ def git_modify_remote_url(git_url) -> str: print(f"Unexpected {errs}") return commit -# Pulling code from mlmd branch -def git_get_pull() -> str: - process = subprocess.Popen('git pull cmf_origin mlmd', +# Pulling code from branch +def git_get_pull(branch_name: str) -> str: + process = subprocess.Popen(f'git pull cmf_origin {branch_name}', cwd=None, shell=True, stdout=subprocess.PIPE, @@ -509,9 +509,9 @@ def git_get_pull() -> str: process.returncode ) -# Pusing code inside mlmd branch -def git_get_push() -> str: - process = subprocess.Popen('git push -u cmf_origin mlmd', +# Pusing code inside branch +def git_get_push(branch_name: str) -> str: + process = subprocess.Popen(f'git push -u cmf_origin {branch_name}', cwd=None, shell=True, stdout=subprocess.PIPE, @@ -521,4 +521,18 @@ def git_get_push() -> str: stdout.decode('utf-8').strip() if stdout else '', stderr.decode('utf-8').strip() if stderr else '', process.returncode - ) \ No newline at end of file + ) + +# Getting current branch +def git_get_branch() -> tuple: + process = subprocess.Popen('git branch --show-current', + cwd=None, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + stdout, stderr = process.communicate() + return ( + stdout.decode('utf-8').strip() if stdout else '', + stderr.decode('utf-8').strip() if stderr else '', + process.returncode + ) diff --git a/cmflib/server_interface/server_interface.py b/cmflib/server_interface/server_interface.py index 3b3730c6..fecf1116 100644 --- a/cmflib/server_interface/server_interface.py +++ b/cmflib/server_interface/server_interface.py @@ -18,18 +18,18 @@ import json # This function posts mlmd data to mlmd_push api on cmf-server -def call_mlmd_push(json_payload, url, exec_id, pipeline_name): +def call_mlmd_push(json_payload, url, exec_uuid, pipeline_name): url_to_pass = f"{url}/mlmd_push" - json_data = {"id": exec_id, "json_payload": json_payload, "pipeline_name": pipeline_name} + json_data = {"exec_uuid": exec_uuid, "json_payload": json_payload, "pipeline_name": pipeline_name} response = requests.post(url_to_pass, json=json_data) # Post request # print("Status code -", response.status_code) return response # This function gets mlmd data from mlmd_pull api from cmf-server -def call_mlmd_pull(url, pipeline_name, exec_id): +def call_mlmd_pull(url, pipeline_name, exec_uuid): url_to_pass = f"{url}/mlmd_pull/{pipeline_name}" - response = requests.get(url_to_pass, json={"exec_id": exec_id}) # Get request + response = requests.get(url_to_pass, json={"exec_uuid": exec_uuid}) # Get request return response diff --git a/server/app/get_data.py b/server/app/get_data.py index 20e7d649..9613bd12 100644 --- a/server/app/get_data.py +++ b/server/app/get_data.py @@ -261,31 +261,35 @@ def create_unique_executions(server_store_path, req_info) -> str: for i in mlmd_data["Pipeline"]: i['stages']=[stage for stage in i['stages'] if stage['executions']!=[]] + for i in mlmd_data["Pipeline"]: - if len(i['stages']) == 0 : status="exists" else: cmf_merger.parse_json_to_mlmd( - json.dumps(mlmd_data), "/cmf-server/data/mlmd", "push", req_info["id"] + json.dumps(mlmd_data), "/cmf-server/data/mlmd", "push", req_info["exec_uuid"] ) status='success' return status -def get_mlmd_from_server(server_store_path: str, pipeline_name: str, exec_id: str): +def get_mlmd_from_server(server_store_path: str, pipeline_name: str, exec_uuid: str, dict_of_exe_ids: dict): query = cmfquery.CmfQuery(server_store_path) json_payload = None - df = pd.DataFrame() + flag=0 if(query.get_pipeline_id(pipeline_name)!=-1): # checks if pipeline name is available in mlmd - if exec_id != None: - exec_id = int(exec_id) - df = query.get_all_executions_by_ids_list([exec_id]) - if df.empty: - json_payload = "no_exec_id" + if exec_uuid != None: + dict_of_exe_ids = dict_of_exe_ids[pipeline_name] + for index, row in dict_of_exe_ids.iterrows(): + exec_uuid_list = row['Execution_uuid'].split(",") + if exec_uuid in exec_uuid_list: + flag=1 + break + if not flag: + json_payload = "no_exec_uuid" return json_payload - json_payload = query.dumptojson(pipeline_name, exec_id) + json_payload = query.dumptojson(pipeline_name, exec_uuid) return json_payload def get_lineage_data(server_store_path,pipeline_name,type,dict_of_art_ids,dict_of_exe_ids): diff --git a/server/app/main.py b/server/app/main.py index 9fe72a03..8e04a09d 100644 --- a/server/app/main.py +++ b/server/app/main.py @@ -123,7 +123,7 @@ async def mlmd_pull(info: Request, pipeline_name: str): req_info = await info.json() if os.path.exists(server_store_path): #json_payload values can be json data, NULL or no_exec_id. - json_payload= await async_api(get_mlmd_from_server, server_store_path, pipeline_name, req_info['exec_id']) + json_payload= await async_api(get_mlmd_from_server, server_store_path, pipeline_name, req_info['exec_uuid'], dict_of_exe_ids) else: raise HTTPException(status_code=413, detail=f"mlmd file not available on cmf-server.") if json_payload == None: diff --git a/ui/src/components/ExecutionTable/index.jsx b/ui/src/components/ExecutionTable/index.jsx index 0f86bdad..bbe2687f 100644 --- a/ui/src/components/ExecutionTable/index.jsx +++ b/ui/src/components/ExecutionTable/index.jsx @@ -27,6 +27,7 @@ const ExecutionTable = ({ executions, onSort, onFilter }) => { const [expandedRow, setExpandedRow] = useState(null); const consistentColumns = []; + console.log("executions",executions); useEffect(() => { // Set initial sorting order when component mounts @@ -156,6 +157,7 @@ const ExecutionTable = ({ executions, onSort, onFilter }) => { + Execution uuid { {expandedRow === index ? "-" : "+"} + {data.Execution_uuid} {data.Context_Type} {data.Execution} {data.Git_Repo} From 83a7588be39268ab1479754699b5f33d1f2183fc Mon Sep 17 00:00:00 2001 From: AyeshaSanadi <167299982+AyeshaSanadi@users.noreply.github.com> Date: Mon, 20 Jan 2025 16:47:54 +0530 Subject: [PATCH 37/41] Replaced execution id to execution uuid --- docs/cmf_client/cmf_client.md | 63 +++++++++++++++++------------------ 1 file changed, 31 insertions(+), 32 deletions(-) diff --git a/docs/cmf_client/cmf_client.md b/docs/cmf_client/cmf_client.md index a8e8cbf9..258af6c2 100644 --- a/docs/cmf_client/cmf_client.md +++ b/docs/cmf_client/cmf_client.md @@ -284,11 +284,11 @@ Usage: cmf metadata [-h] {pull,push,export} `cmf metadata` push, pull or export the metadata file to and from the cmf-server, respectively. ### cmf metadata pull ``` -Usage: cmf metadata pull [-h] -p [pipeline_name] -f [file_name] -e [exec_id] +Usage: cmf metadata pull [-h] -p [pipeline_name] -f [file_name] -e [exec_uuid] ``` `cmf metadata pull` command pulls the metadata file from the cmf-server to the user's local machine. ``` -cmf metadata pull -p 'pipeline-name' -f '/path/to/mlmd-file-name' -e 'execution_id' +cmf metadata pull -p 'pipeline-name' -f '/path/to/mlmd-file-name' -e 'execution_uuid' ``` Required Arguments ``` @@ -296,17 +296,17 @@ Required Arguments ``` Optional Arguments ``` --h, --help show this help message and exit. --e [exec_id], --execution [exec_id] Specify execution id. --f [file_name], --file_name [file_name] Specify mlmd file name with full path(either relative or absolute). +-h, --help show this help message and exit. +-e [exec_uuid], --execution_uuid [exec_uuid] Specify execution uuid. +-f [file_name], --file_name [file_name] Specify mlmd file name with full path(either relative or absolute). ``` ### cmf metadata push ``` -Usage: cmf metadata push [-h] -p [pipeline_name] -f [file_name] -e [exec_id] -t [tensorboard] +Usage: cmf metadata push [-h] -p [pipeline_name] -f [file_name] -e [exec_uuid] -t [tensorboard] ``` `cmf metadata push` command pushes the metadata file from the local machine to the cmf-server. ``` -cmf metadata push -p 'pipeline-name' -f '/path/to/mlmd-file-name' -e 'execution_id' -t '/path/to/tensorboard-log' +cmf metadata push -p 'pipeline-name' -f '/path/to/mlmd-file-name' -e 'execution_uuid' -t '/path/to/tensorboard-log' ``` Required Arguments ``` @@ -314,10 +314,10 @@ Required Arguments ``` Optional Arguments ``` - -h, --help show this help message and exit. - -f [file_name], --file_name [file_name] Specify mlmd file name. - -e [exec_id], --execution [exec_id] Specify execution id. - -t [tensorboard], --tensorboard [tensorboard] Specify path to tensorboard logs for the pipeline. + -h, --help show this help message and exit. + -f [file_name], --file_name [file_name] Specify mlmd file name. + -e [exec_uuid], --execution [exec_uuid] Specify execution uuid. + -t [tensorboard], --tensorboard [tensorboard] Specify path to tensorboard logs for the pipeline. ``` ### cmf metadata export ``` @@ -329,7 +329,7 @@ cmf metadata export -p 'pipeline-name' -j '/path/to/json-file-name' -f '/path/to ``` Required Arguments ``` --p [pipeline_name], --pipeline_name [pipeline_name] Specify Pipeline name. +-p [pipeline_name], --pipeline_name [pipeline_name] Specify Pipeline name. ``` Optional Arguments ``` @@ -345,21 +345,21 @@ Usage: cmf execution [-h] {list} `cmf execution` command to displays executions from the MLMD file. ### cmf executions list ``` -Usage: cmf execution list [-h] -p [pipeline_name] -f [file_name] -e [execution_id] +Usage: cmf execution list [-h] -p [pipeline_name] -f [file_name] -e [execution_uuid] ``` `cmf execution list` command to displays executions from the MLMD file with a few properties in a 7-column table, limited to 20 records per page. ``` -cmf execution list -p 'pipeline_name' -f '/path/to/mlmd-file-name' -e 'execution_id' +cmf execution list -p 'pipeline_name' -f '/path/to/mlmd-file-name' -e 'execution_uuid' ``` Required Arguments ``` - -p [pipeline_name], --pipeline-name [pipeline_name] Specify Pipeline name. + -p [pipeline_name], --pipeline-name [pipeline_name] Specify Pipeline name. ``` Optional Arguments ``` - -h, --help show this help message and exit. - --f [file_name], --file-name [file_name] Specify the absolute or relative path for the input MLMD file. - -e [exe_id], --execution_id [exe_id] Specify the execution id to retrieve execution. + -h, --help show this help message and exit. + --f [file_name], --file-name [file_name] Specify the absolute or relative path for the input MLMD file. + -e [exe_uuid], --execution_id [exe_uuid] Specify the execution uuid to retrieve execution. ``` ## cmf pipeline @@ -388,39 +388,38 @@ Usage: cmf repo [-h] {push, pull} `cmf repo` command push and pull artifacts, metadata files, and source code to and from the user's artifact repository, cmf-server, and git respectively. ### cmf repo push ``` -Usage: cmf repo push [-h] -p [pipeline_name] -f [file_name] -e [exec_id] -t [tensorboard] +Usage: cmf repo push [-h] -p [pipeline_name] -f [file_name] -e [exec_uuid] -t [tensorboard] ``` `cmf repo push` command push artifacts, metadata files, and source code to the user's artifact repository, cmf-server, and git respectively. ``` -cmf repo push -p 'pipeline-name' -f '/path/to/mlmd-file-name' -e 'execution_id' -t 'tensorboard_log_path' +cmf repo push -p 'pipeline-name' -f '/path/to/mlmd-file-name' -e 'execution_uuid' -t 'tensorboard_log_path' ``` Required Arguments ``` - -p [pipeline_name], --pipeline-name [pipeline_name] Specify Pipeline name. + -p [pipeline_name], --pipeline-name [pipeline_name] Specify Pipeline name. ``` Optional Arguments ``` - -h, --help show this help message and exit. - -f [file_name], --file-name [file_name] Specify mlmd file name. - -e [exec_id], --execution [exec_id] Specify execution id. - -t [tensorboard], --tensorboard [tensorboard] Specify path to tensorboard logs for the pipeline. + -h, --help show this help message and exit. + -f [file_name], --file-name [file_name] Specify mlmd file name. + -e [exec_uuid], --execution_uuid [exec_uuid] Specify execution uuid. + -t [tensorboard], --tensorboard [tensorboard] Specify path to tensorboard logs for the pipeline. ``` ### cmf repo pull ``` -Usage: cmf repo pull [-h] -p [pipeline_name] -f [file_name] -e [exec_id] -a [artifact_name] +Usage: cmf repo pull [-h] -p [pipeline_name] -f [file_name] -e [exec_uuid] ``` `cmf repo pull` command pull artifacts, metadata files, and source code from the user's artifact repository, cmf-server, and git respectively. ``` -cmf repo pull -p 'pipeline-name' -f '/path/to/mlmd-file-name' -e 'execution_id' -a 'artifact_name' +cmf repo pull -p 'pipeline-name' -f '/path/to/mlmd-file-name' -e 'execution_uuid' ``` Required Arguments ``` - -p [pipeline_name], --pipeline-name [pipeline_name] Specify Pipeline name. + -p [pipeline_name], --pipeline-name [pipeline_name] Specify Pipeline name. ``` Optional Arguments ``` - -h, --help show this help message and exit. - -f [file_name], --file-name [file_name] Specify mlmd file name. - -e [exec_id], --execution [exec_id] Specify execution id. - -a [artifact_name], --artifact_name [artifact_name] Specify the artifact name. + -h, --help show this help message and exit. + -f [file_name], --file-name [file_name] Specify mlmd file name. + -e [exec_uuid], --execution_uuid [exec_uuid] Specify execution uuid. ``` From fab258abdae23f6d1354cfdd852edef68a9abb29 Mon Sep 17 00:00:00 2001 From: AyeshaSanadi Date: Fri, 24 Jan 2025 16:04:02 +0530 Subject: [PATCH 38/41] Removed duplicate uuid from executions and display it inside gui --- ui/src/components/ExecutionTable/index.jsx | 1 - ui/src/pages/executions/index.jsx | 4 ++++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/ui/src/components/ExecutionTable/index.jsx b/ui/src/components/ExecutionTable/index.jsx index bbe2687f..d8cfc1ec 100644 --- a/ui/src/components/ExecutionTable/index.jsx +++ b/ui/src/components/ExecutionTable/index.jsx @@ -27,7 +27,6 @@ const ExecutionTable = ({ executions, onSort, onFilter }) => { const [expandedRow, setExpandedRow] = useState(null); const consistentColumns = []; - console.log("executions",executions); useEffect(() => { // Set initial sorting order when component mounts diff --git a/ui/src/pages/executions/index.jsx b/ui/src/pages/executions/index.jsx index ff004f76..ebb07a1a 100644 --- a/ui/src/pages/executions/index.jsx +++ b/ui/src/pages/executions/index.jsx @@ -95,6 +95,10 @@ const Executions = () => { filterValue, ) .then((data) => { + // Removing repeated execution uuid from executions. + data.items.map((data, index) => ( + data.Execution_uuid = [...new Set(data.Execution_uuid.split(","))].join(",") + )); setExecutions(data.items); setTotalItems(data.total_items); }); From 5031e28517354d3ad4a3b1f1a09dc7c2f2ca87ba Mon Sep 17 00:00:00 2001 From: AyeshaSanadi Date: Fri, 24 Jan 2025 16:05:27 +0530 Subject: [PATCH 39/41] Renamed variable name --- cmflib/commands/init/amazonS3.py | 4 ++-- cmflib/commands/init/local.py | 4 ++-- cmflib/commands/init/minioS3.py | 4 ++-- cmflib/commands/init/sshremote.py | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/cmflib/commands/init/amazonS3.py b/cmflib/commands/init/amazonS3.py index 69e4e2af..793559cc 100644 --- a/cmflib/commands/init/amazonS3.py +++ b/cmflib/commands/init/amazonS3.py @@ -39,7 +39,7 @@ def run(self): # Reading CONFIG_FILE variable cmf_config = os.environ.get("CONFIG_FILE", ".cmfconfig") - required_args = { + cmd_args = { "url": self.args.url, "access-key-id": self.args.access_key_id, "secret-key": self.args.secret_key, @@ -49,7 +49,7 @@ def run(self): "neo4j-password" : self.args.neo4j_password, "neo4j_uri" : self.args.neo4j_uri } - for arg_name, arg_value in required_args.items(): + for arg_name, arg_value in cmd_args.items(): if arg_value: if arg_value[0] == "": raise MissingArgument(arg_name) diff --git a/cmflib/commands/init/local.py b/cmflib/commands/init/local.py index 718e08b9..be35bded 100644 --- a/cmflib/commands/init/local.py +++ b/cmflib/commands/init/local.py @@ -37,7 +37,7 @@ def run(self): # Reading CONFIG_FILE variable cmf_config = os.environ.get("CONFIG_FILE", ".cmfconfig") - required_args = { + cmd_args = { "path": self.args.path, "git-remote-url": self.args.git_remote_url, "neo4j-user" : self.args.neo4j_user, @@ -45,7 +45,7 @@ def run(self): "neo4j_uri" : self.args.neo4j_uri } - for arg_name, arg_value in required_args.items(): + for arg_name, arg_value in cmd_args.items(): if arg_value: if arg_value[0] == "": raise MissingArgument(arg_name) diff --git a/cmflib/commands/init/minioS3.py b/cmflib/commands/init/minioS3.py index f0b80db7..cd798f79 100644 --- a/cmflib/commands/init/minioS3.py +++ b/cmflib/commands/init/minioS3.py @@ -39,7 +39,7 @@ def run(self): # Reading CONFIG_FILE variable cmf_config = os.environ.get("CONFIG_FILE", ".cmfconfig") - required_args = { + cmd_args = { "url": self.args.url, "endpoint-url": self.args.endpoint_url, "access-key-id": self.args.access_key_id, @@ -50,7 +50,7 @@ def run(self): "neo4j_uri" : self.args.neo4j_uri } - for arg_name, arg_value in required_args.items(): + for arg_name, arg_value in cmd_args.items(): if arg_value: if arg_value[0] == "": raise MissingArgument(arg_name) diff --git a/cmflib/commands/init/sshremote.py b/cmflib/commands/init/sshremote.py index a85badae..eb82ea76 100644 --- a/cmflib/commands/init/sshremote.py +++ b/cmflib/commands/init/sshremote.py @@ -38,7 +38,7 @@ def run(self): # Reading CONFIG_FILE variable cmf_config = os.environ.get("CONFIG_FILE", ".cmfconfig") - required_args = { + cmd_args = { "path": self.args.path, "user": self.args.user, "port": self.args.port, @@ -49,7 +49,7 @@ def run(self): "neo4j_uri" : self.args.neo4j_uri } - for arg_name, arg_value in required_args.items(): + for arg_name, arg_value in cmd_args.items(): if arg_value: if arg_value[0] == "": raise MissingArgument(arg_name) From d042b52e3d5625d1b04371609fc337d1e7cb43a4 Mon Sep 17 00:00:00 2001 From: AyeshaSanadi Date: Fri, 24 Jan 2025 16:06:43 +0530 Subject: [PATCH 40/41] Added validation, Fixed -a issue and added proper comment --- cmflib/cmf.py | 51 ++-- cmflib/cmf_commands_wrapper.py | 26 +- cmflib/cmf_merger.py | 2 +- cmflib/cmfquery.py | 33 ++- cmflib/commands/artifact/list.py | 32 +-- cmflib/commands/artifact/pull.py | 323 +++++++++++----------- cmflib/commands/artifact/push.py | 28 +- cmflib/commands/execution/list.py | 29 +- cmflib/commands/metadata/export.py | 39 ++- cmflib/commands/metadata/pull.py | 24 +- cmflib/commands/metadata/push.py | 203 +++++++------- cmflib/commands/repo/push.py | 3 +- examples/example-get-started/src/parse.py | 1 + 13 files changed, 394 insertions(+), 400 deletions(-) diff --git a/cmflib/cmf.py b/cmflib/cmf.py index fe282b63..19100d55 100644 --- a/cmflib/cmf.py +++ b/cmflib/cmf.py @@ -1998,42 +1998,42 @@ def commit_existing(self, uri: str, props: t.Optional[t.Dict] = None, custom_pro # print(last) # os.symlink(str(index), slicedir + "/ " + last) -def metadata_push(pipeline_name: str, filepath = "./mlmd", tensorboard_path: str = "", execution_id: str = ""): +def metadata_push(pipeline_name: str, filepath = "./mlmd", tensorboard_path: str = "", execution_uuid: str = ""): """ Pushes MLMD file to CMF-server. Example: ```python - result = metadata_push("example_pipeline", "mlmd_file", "3") + result = metadata_push("example_pipeline", "mlmd_file", "eg_execution_uuid") ``` Args: pipeline_name: Name of the pipeline. filepath: Path to the MLMD file. - execution_id: Optional execution ID. + execution_uuid: Optional execution UUID. tensorboard_path: Path to tensorboard logs. Returns: Response output from the _metadata_push function. """ # Required arguments: pipeline_name - # Optional arguments: Execution_ID, filepath (mlmd file path, tensorboard_path - output = _metadata_push(pipeline_name, filepath, execution_id, tensorboard_path) + # Optional arguments: Execution_UUID, filepath (mlmd file path), tensorboard_path + output = _metadata_push(pipeline_name, filepath, execution_uuid, tensorboard_path) return output -def metadata_pull(pipeline_name: str, filepath = "./mlmd", execution_id: str = ""): +def metadata_pull(pipeline_name: str, filepath = "./mlmd", execution_uuid: str = ""): """ Pulls MLMD file from CMF-server. Example: ```python - result = metadata_pull("example_pipeline", "./mlmd_directory", "execution_123") + result = metadata_pull("example_pipeline", "./mlmd_directory", "eg_execution_uuid") ``` Args: pipeline_name: Name of the pipeline. filepath: File path to store the MLMD file. - execution_id: Optional execution ID. + execution_uuid: Optional execution UUID. Returns: Message from the _metadata_pull function. """ # Required arguments: pipeline_name - #Optional arguments: Execution_ID, filepath(file path to store mlmd file) - output = _metadata_pull(pipeline_name, filepath, execution_id) + #Optional arguments: Execution_UUID, filepath(file path to store mlmd file) + output = _metadata_pull(pipeline_name, filepath, execution_uuid) return output def metadata_export(pipeline_name: str, jsonfilepath: str = "", filepath = "./mlmd"): @@ -2333,23 +2333,23 @@ def pipeline_list(filepath = "./mlmd"): return output -def execution_list(pipeline_name: str, filepath = "./mlmd", execution_id: str = ""): +def execution_list(pipeline_name: str, filepath = "./mlmd", execution_uuid: str = ""): """Displays executions from the MLMD file with a few properties in a 7-column table, limited to 20 records per page. Example: ```python - result = _execution_list("example_pipeline", "./mlmd_directory", "example_execution_id") + result = _execution_list("example_pipeline", "./mlmd_directory", "example_execution_uuid") ``` Args: pipeline_name: Name of the pipeline. filepath: Path to store the mlmd file. - execution_id: Executions for particular execution id. + execution_uuid: Executions for particular execution uuid. Returns: Output from the _execution_list function. """ # Required arguments: pipeline_name - # Optional arguments: filepath( path to store mlmd file), execution_id - output = _execution_list(pipeline_name, filepath, execution_id) + # Optional arguments: filepath( path to store mlmd file), execution_uuid + output = _execution_list(pipeline_name, filepath, execution_uuid) return output @@ -2373,43 +2373,42 @@ def artifact_list(pipeline_name: str, filepath = "./mlmd", artifact_name: str = return output -def repo_push(pipeline_name: str, filepath = "./mlmd", tensorboard_path: str = "", execution_id: str = ""): +def repo_push(pipeline_name: str, filepath = "./mlmd", tensorboard_path: str = "", execution_uuid: str = ""): """ Push artifacts, metadata files, and source code to the user's artifact repository, cmf-server, and git respectively. Example: ```python - result = _repo_push("example_pipeline", "./mlmd_directory", "example_execution_id", "./tensorboard_path") + result = _repo_push("example_pipeline", "./mlmd_directory", "example_execution_uuid", "./tensorboard_path") ``` Args: pipeline_name: Name of the pipeline. filepath: Path to store the mlmd file. - execution_id: Executions for particular execution id. + execution_uuid: Executions for particular execution uuid. tensorboard_path: Path to tensorboard logs. Returns: Output from the _repo_push function. """ # Required arguments: pipeline_name - # Optional arguments: filepath, execution_id, tensorboard_path - output = _repo_push(pipeline_name, filepath, execution_id, tensorboard_path) + # Optional arguments: filepath, execution_uuid, tensorboard_path + output = _repo_push(pipeline_name, filepath, execution_uuid, tensorboard_path) return output -def repo_pull(pipeline_name: str, filepath = "./mlmd", artifact_name: str = "", execution_id: str = ""): +def repo_pull(pipeline_name: str, filepath = "./mlmd", execution_uuid: str = ""): """ Pull artifacts, metadata files, and source code from the user's artifact repository, cmf-server, and git respectively. Example: ```python - result = _repo_pull("example_pipeline", "./mlmd_directory", "example_artifact_name", "example_execution_id") + result = _repo_pull("example_pipeline", "./mlmd_directory", "example_execution_uuid") ``` Args: pipeline_name: Name of the pipeline. filepath: Path to store the mlmd file. - artifact_name: Artifacts for particular artifact name. - execution_id: Executions for particular execution id. + execution_uuid: Executions for particular execution uuid. Returns: Output from the _repo_pull function. """ # Required arguments: pipeline_name - # Optional arguments: filepath, artifact_name, execution_id - output = _repo_pull(pipeline_name, filepath, artifact_name, execution_id) + # Optional arguments: filepath, execution_uuid + output = _repo_pull(pipeline_name, filepath, execution_uuid) return output \ No newline at end of file diff --git a/cmflib/cmf_commands_wrapper.py b/cmflib/cmf_commands_wrapper.py index 23090af6..c656f626 100644 --- a/cmflib/cmf_commands_wrapper.py +++ b/cmflib/cmf_commands_wrapper.py @@ -17,7 +17,7 @@ from cmflib import cli -def _metadata_push(pipeline_name, file_name, execution_id, tensorboard): +def _metadata_push(pipeline_name, file_name, execution_uuid, tensorboard): cli_args = cli.parse_args( [ "metadata", @@ -27,7 +27,7 @@ def _metadata_push(pipeline_name, file_name, execution_id, tensorboard): "-f", file_name, "-e", - execution_id, + execution_uuid, "-t", tensorboard ] @@ -37,7 +37,7 @@ def _metadata_push(pipeline_name, file_name, execution_id, tensorboard): print(msg) return msg -def _metadata_pull(pipeline_name, file_name, execution_id): +def _metadata_pull(pipeline_name, file_name, execution_uuid): cli_args = cli.parse_args( [ "metadata", @@ -47,7 +47,7 @@ def _metadata_pull(pipeline_name, file_name, execution_id): "-f", file_name, "-e", - execution_id, + execution_uuid, ] ) cmd = cli_args.func(cli_args) @@ -316,7 +316,7 @@ def _pipeline_list(file_name): print(msg) return msg -def _execution_list(pipeline_name, file_name, execution_id): +def _execution_list(pipeline_name, file_name, execution_uuid): cli_args = cli.parse_args( [ "execution", @@ -326,7 +326,7 @@ def _execution_list(pipeline_name, file_name, execution_id): "-f", file_name, "-e", - execution_id + execution_uuid ] ) cmd = cli_args.func(cli_args) @@ -334,7 +334,7 @@ def _execution_list(pipeline_name, file_name, execution_id): print(msg) return msg -def _repo_push(pipeline_name, file_name, tensorboard_path, execution_id): +def _repo_push(pipeline_name, file_name, tensorboard_path, execution_uuid): cli_args = cli.parse_args( [ "repo", @@ -344,7 +344,7 @@ def _repo_push(pipeline_name, file_name, tensorboard_path, execution_id): "-f", file_name, "-e", - execution_id, + execution_uuid, "-t", tensorboard_path ] @@ -354,19 +354,17 @@ def _repo_push(pipeline_name, file_name, tensorboard_path, execution_id): print(msg) return msg -def _repo_pull(pipeline_name, file_name, artifact_name, execution_id): +def _repo_pull(pipeline_name, file_name, execution_uuid): cli_args = cli.parse_args( [ - "execution", - "list", + "repo", + "pull", "-p", pipeline_name, "-f", file_name, - "-a", - artifact_name, "-e", - execution_id + execution_uuid ] ) cmd = cli_args.func(cli_args) diff --git a/cmflib/cmf_merger.py b/cmflib/cmf_merger.py index 70d6494a..978ff8e4 100644 --- a/cmflib/cmf_merger.py +++ b/cmflib/cmf_merger.py @@ -61,7 +61,7 @@ def parse_json_to_mlmd(mlmd_json, path_to_store: str, cmd: str, exec_uuid: Union if exec_uuid in execution['properties']["Execution_uuid"].split(",") ] else: - return "Invalid execution id given." + return "Invalid execution uuid given." for execution in list_executions: # Iterates over all the executions try: _ = cmf_class.merge_created_context( diff --git a/cmflib/cmfquery.py b/cmflib/cmfquery.py index 6a40895a..68f613a8 100644 --- a/cmflib/cmfquery.py +++ b/cmflib/cmfquery.py @@ -239,23 +239,18 @@ def _get_stages(self, pipeline_id: int) -> t.List[mlpb.Context]: """ return self.store.get_children_contexts_by_context(pipeline_id) - def _get_executions(self, stage_id: int, execution_uuid: t.Optional[str] = None) -> t.List[mlpb.Execution]: + def _get_executions(self, stage_id: int, execution_id: t.Optional[int] = None) -> t.List[mlpb.Execution]: """Return executions of the given stage. Args: stage_id: Stage identifier. - execution_uuid: If not None, return only execution with this uuid. + execution_id: If not None, return execution with this ID. Returns: List of executions matching input parameters. """ executions: t.List[mlpb.Execution] = self.store.get_executions_by_context(stage_id) - if execution_uuid is not None: - executions_list = executions - executions = [] - for execution in executions_list: - exec_uuid_list = execution.properties['Execution_uuid'].string_value.split(",") - if execution_uuid in exec_uuid_list: - executions.append(execution) + if execution_id is not None: + executions = [execution for execution in executions if execution.id == execution_id] return executions def _get_executions_by_input_artifact_id(self, artifact_id: int,pipeline_id: str = None) -> t.List[int]: @@ -923,7 +918,7 @@ def _get_node_attributes(_node: t.Union[mlpb.Context, mlpb.Execution, mlpb.Event pipeline_attrs = _get_node_attributes(pipeline, {"stages": []}) for stage in self._get_stages(pipeline.id): stage_attrs = _get_node_attributes(stage, {"executions": []}) - for execution in self._get_executions(stage.id, execution_uuid=exec_uuid): + for execution in self.get_all_executions_by_uuid(stage.id, execution_uuid=exec_uuid): # name will be an empty string for executions that are created with # create new execution as true(default) # In other words name property will there only for execution @@ -984,6 +979,24 @@ def get_all_executions_for_artifact_id(self, artifact_id: int) -> pd.DataFrame: except: return df return df + + def get_all_executions_by_uuid(self, stage_id: int, execution_uuid: t.Optional[str] = None) -> t.List[mlpb.Execution]: + """Return executions of the given stage. + Args: + stage_id: Stage identifier. + execution_uuid: If not None, return execution with this uuid. + Returns: + List of executions matching input parameters. + """ + executions: t.List[mlpb.Execution] = self.store.get_executions_by_context(stage_id) + if execution_uuid is None: + return executions + executions_with_uuid = [] + for execution in executions: + exec_uuid_list = execution.properties['Execution_uuid'].string_value.split(",") + if execution_uuid in exec_uuid_list: + executions_with_uuid.append(execution) + return executions_with_uuid """def materialize(self, artifact_name:str): artifacts = self.store.get_artifacts() diff --git a/cmflib/commands/artifact/list.py b/cmflib/commands/artifact/list.py index 9670d040..8f1b5e75 100644 --- a/cmflib/commands/artifact/list.py +++ b/cmflib/commands/artifact/list.py @@ -135,16 +135,23 @@ def search_artifact(self, df: pd.DataFrame) -> Union[int, List[int]]: return -1 def run(self): - + cmd_args = { + "file_name": self.args.file_name, + "pipeline_name": self.args.pipeline_name, + "artifact_name": self.args.artifact_name + } + for arg_name, arg_value in cmd_args.items(): + if arg_value: + if arg_value[0] == "": + raise MissingArgument(arg_name) + elif len(arg_value) > 1: + raise DuplicateArgumentNotAllowed(arg_name,("-"+arg_name[0])) + # default path for mlmd file name mlmd_file_name = "./mlmd" current_directory = os.getcwd() if not self.args.file_name: # If self.args.file_name is None or an empty list ([]). mlmd_file_name = "./mlmd" # Default path for mlmd file name. - elif len(self.args.file_name) > 1: # If the user provided more than one file name. - raise DuplicateArgumentNotAllowed("file_name", "-f") - elif not self.args.file_name[0]: # self.args.file_name[0] is an empty string (""). - raise MissingArgument("file name") else: mlmd_file_name = self.args.file_name[0].strip() if mlmd_file_name == "mlmd": @@ -152,17 +159,12 @@ def run(self): current_directory = os.path.dirname(mlmd_file_name) if not os.path.exists(mlmd_file_name): raise FileNotFound(mlmd_file_name, current_directory) + # Creating cmfquery object. query = cmfquery.CmfQuery(mlmd_file_name) # Check if pipeline exists in mlmd. - if self.args.pipeline_name is not None and len(self.args.pipeline_name) > 1: - raise DuplicateArgumentNotAllowed("pipeline_name", "-p") - elif not self.args.pipeline_name[0]: # self.args.pipeline_name[0] is an empty string (""). - raise MissingArgument("pipeline name") - else: - pipeline_name = self.args.pipeline_name[0] - + pipeline_name = self.args.pipeline_name[0] df = query.get_all_artifacts_by_context(pipeline_name) if df.empty: @@ -170,10 +172,6 @@ def run(self): else: if not self.args.artifact_name: # If self.args.artifact_name is None or an empty list ([]). pass - elif len(self.args.artifact_name) > 1: # If the user provided more than one artifact_name. - raise DuplicateArgumentNotAllowed("artifact_name", "-a") - elif not self.args.artifact_name[0]: # self.args.artifact_name[0] is an empty string (""). - raise MissingArgument("artifact name") else: artifact_ids = self.search_artifact(df) if(artifact_ids != -1): @@ -223,7 +221,7 @@ def run(self): break return MsgSuccess(msg_str = "End of records..") else: - raise ArtifactNotFound(self.args.artifact_name) + raise ArtifactNotFound(self.args.artifact_name[0]) df = self.convert_to_datetime(df, "create_time_since_epoch") self.display_table(df) diff --git a/cmflib/commands/artifact/pull.py b/cmflib/commands/artifact/pull.py index 3637f943..c5babd10 100644 --- a/cmflib/commands/artifact/pull.py +++ b/cmflib/commands/artifact/pull.py @@ -46,10 +46,6 @@ from cmflib.cmf_exception_handling import CmfNotConfigured class CmdArtifactPull(CmdBase): - - def __init__(self, args): - self.args = args - def split_url_pipeline(self, url: str, pipeline_name: str): # This function takes url and pipeline_name as a input parameter # return string which contains the artifact repo path of the artifact @@ -157,26 +153,48 @@ def extract_repo_args(self, type: str, name: str, url: str, current_directory: s return "", "", "" def search_artifact(self, input_dict, remote): + flag = True + artifact_name = self.args.artifact_name[0] + # This function takes input_dict as input artifact for name, url in input_dict.items(): if not isinstance(url, str): continue - # Splitting the 'name' using ':' as the delimiter and storing the first argument in the 'name' variable. - name = name.split(":")[0] + # Splitting the 'name' using ':' as the delimiter and storing the first argument in the 'file_path' variable. + # eg name = ./a/data.xml.gz:12345abcd --> a/data.xml.gz + file_path = name.split(":")[0] # Splitting the path on '/' to extract the file name, excluding the directory structure. - file_name = name.split('/')[-1] - if file_name == self.args.artifact_name and remote == "osdf": + # eg name = ./a/data.xml.gz --> data.xml.gz + file_name = file_path.split('/')[-1] + + if remote == "osdf": artifact_hash = name = name.split(":")[1] return name, url, artifact_hash - else: - return name, url + elif name == artifact_name or file_path == artifact_name or file_name == artifact_name: + flag = False + break + if flag: + raise ArtifactNotFound(artifact_name) + return name, url def run(self): output = DvcConfig.get_dvc_config() # pulling dvc config if type(output) is not dict: raise CmfNotConfigured(output) + cmd_args = { + "file_name": self.args.file_name, + "pipeline_name": self.args.pipeline_name, + "artifact_name": self.args.artifact_name + } + for arg_name, arg_value in cmd_args.items(): + if arg_value: + if arg_value[0] == "": + raise MissingArgument(arg_name) + elif len(arg_value) > 1: + raise DuplicateArgumentNotAllowed(arg_name,("-"+arg_name[0])) + # check whether 'mlmd' file exist in current directory # or in the directory provided by user # pipeline_name = self.args.pipeline_name @@ -184,10 +202,6 @@ def run(self): mlmd_file_name = "./mlmd" if not self.args.file_name: # If self.args.file_name is None or an empty list ([]). mlmd_file_name = "./mlmd" # Default path for mlmd file name. - elif len(self.args.file_name) > 1: # If the user provided more than one file name. - raise DuplicateArgumentNotAllowed("file_name", "-f") - elif not self.args.file_name[0]: # self.args.file_name[0] is an empty string (""). - raise MissingArgument("file name") else: mlmd_file_name = self.args.file_name[0].strip() if "/" not in mlmd_file_name: @@ -196,21 +210,13 @@ def run(self): if not self.args.artifact_name: # If self.args.artifact_name[0] is None or an empty list ([]). pass - elif len(self.args.artifact_name) > 1: # If the user provided more than one artifact_name. - raise DuplicateArgumentNotAllowed("artifact_name", "-a") - elif not self.args.artifact_name[0]: # self.args.artifact_name[0] is an empty string (""). - raise MissingArgument("artifact name") if not os.path.exists(mlmd_file_name): #checking if MLMD files exists raise FileNotFound(mlmd_file_name, current_directory) query = cmfquery.CmfQuery(mlmd_file_name) - if self.args.pipeline_name is not None and len(self.args.pipeline_name) > 1: - raise DuplicateArgumentNotAllowed("pipeline_name", "-p") - elif not self.args.pipeline_name[0]: # self.args.pipeline_name[0] is an empty string (""). - raise MissingArgument("pipeline name") - elif not query.get_pipeline_id(self.args.pipeline_name[0]) > 0: #checking if pipeline name exists in mlmd - raise PipelineNotFound(self.args.pipeline_name) + if not query.get_pipeline_id(self.args.pipeline_name[0]) > 0: #checking if pipeline name exists in mlmd + raise PipelineNotFound(self.args.pipeline_name[0]) # getting all pipeline stages[i.e Prepare, Featurize, Train and Evaluate] stages = query.get_pipeline_stages(self.args.pipeline_name[0]) @@ -261,43 +267,40 @@ def run(self): # output[0] = artifact_name # output[1] = url # output[2] = hash - if output is None: - raise ArtifactNotFound(self.args.artifact_name[0]) - else: - # Extract repository arguments specific to MinIO. - minio_args = self.extract_repo_args("minio", output[0], output[1], current_directory) + # Extract repository arguments specific to MinIO. + minio_args = self.extract_repo_args("minio", output[0], output[1], current_directory) - # Check if the object name doesn't end with `.dir` (indicating it's a file). - if not minio_args[1].endswith(".dir"): - # Download a single file from MinIO. - object_name, download_loc, download_flag = minio_class_obj.download_file( - current_directory, - minio_args[0], # bucket_name - minio_args[1], # object_name - minio_args[2], # path_name - ) - if download_flag: - # Return success if the file is downloaded successfully. - return ObjectDownloadSuccess(object_name, download_loc) - else: - raise ObjectDownloadFailure(object_name) + # Check if the object name doesn't end with `.dir` (indicating it's a file). + if not minio_args[1].endswith(".dir"): + # Download a single file from MinIO. + object_name, download_loc, download_flag = minio_class_obj.download_file( + current_directory, + minio_args[0], # bucket_name + minio_args[1], # object_name + minio_args[2], # path_name + ) + if download_flag: + # Return success if the file is downloaded successfully. + return ObjectDownloadSuccess(object_name, download_loc) + else: + raise ObjectDownloadFailure(object_name) + else: + # If object name ends with `.dir`, download multiple files from a directory + # return total_files_in_directory, files_downloaded + total_files_in_directory, dir_files_downloaded, download_flag = minio_class_obj.download_directory( + current_directory, + minio_args[0], # bucket_name + minio_args[1], # object_name + minio_args[2], # path_name + ) + + if download_flag: + # Return success if all files in the directory are downloaded. + return BatchDownloadSuccess(dir_files_downloaded) else: - # If object name ends with `.dir`, download multiple files from a directory - # return total_files_in_directory, files_downloaded - total_files_in_directory, dir_files_downloaded, download_flag = minio_class_obj.download_directory( - current_directory, - minio_args[0], # bucket_name - minio_args[1], # object_name - minio_args[2], # path_name - ) - - if download_flag: - # Return success if all files in the directory are downloaded. - return BatchDownloadSuccess(dir_files_downloaded) - else: - # Calculate the number of files that failed to download. - file_failed_to_download = total_files_in_directory - dir_files_downloaded - raise BatchDownloadFailure(dir_files_downloaded, file_failed_to_download) + # Calculate the number of files that failed to download. + file_failed_to_download = total_files_in_directory - dir_files_downloaded + raise BatchDownloadFailure(dir_files_downloaded, file_failed_to_download) else: # Handle the case where no specific artifact name is provided. @@ -361,36 +364,32 @@ def run(self): output = self.search_artifact(name_url_dict, dvc_config_op["core.remote"]) # output[0] = name # output[1] = url - - if output is None: - raise ArtifactNotFound(self.args.artifact_name[0]) + # Extract repository arguments specific to Local repo. + local_args = self.extract_repo_args("local", output[0], output[1], current_directory) + # local_args [0] = current_dvc_loc + # local_args [1] = download_loc + # Check if the object name doesn't end with `.dir` (indicating it's a file). + if not local_args[0].endswith(".dir"): + # Download a single file from Local. + object_name, download_loc, download_flag = local_class_obj.download_file(current_directory, local_args[0], local_args[1]) + if download_flag: + # Return success if the file is downloaded successfully. + return ObjectDownloadSuccess(object_name, download_loc) + else: + raise ObjectDownloadFailure(object_name) + else: - # Extract repository arguments specific to Local repo. - local_args = self.extract_repo_args("local", output[0], output[1], current_directory) - # local_args [0] = current_dvc_loc - # local_args [1] = download_loc - # Check if the object name doesn't end with `.dir` (indicating it's a file). - if not local_args[0].endswith(".dir"): - # Download a single file from Local. - object_name, download_loc, download_flag = local_class_obj.download_file(current_directory, local_args[0], local_args[1]) - if download_flag: - # Return success if the file is downloaded successfully. - return ObjectDownloadSuccess(object_name, download_loc) - else: - raise ObjectDownloadFailure(object_name) - + # If object name ends with `.dir`, download multiple files from a directory + # return total_files_in_directory, files_downloaded + total_files_in_directory, dir_files_downloaded, download_flag = local_class_obj.download_directory(current_directory, local_args[0], local_args[1]) + + if download_flag: + # Return success if all files in the directory are downloaded. + return BatchDownloadSuccess(dir_files_downloaded) else: - # If object name ends with `.dir`, download multiple files from a directory - # return total_files_in_directory, files_downloaded - total_files_in_directory, dir_files_downloaded, download_flag = local_class_obj.download_directory(current_directory, local_args[0], local_args[1]) - - if download_flag: - # Return success if all files in the directory are downloaded. - return BatchDownloadSuccess(dir_files_downloaded) - else: - # Calculate the number of files that failed to download. - file_failed_to_download = total_files_in_directory - dir_files_downloaded - raise BatchDownloadFailure(dir_files_downloaded, file_failed_to_download) + # Calculate the number of files that failed to download. + file_failed_to_download = total_files_in_directory - dir_files_downloaded + raise BatchDownloadFailure(dir_files_downloaded, file_failed_to_download) else: # Handle the case where no specific artifact name is provided. files_downloaded = 0 @@ -447,42 +446,39 @@ def run(self): output = self.search_artifact(name_url_dict, dvc_config_op["core.remote"]) # output[0] = name # output[1] = url - if output is None: - raise ArtifactNotFound(self.args.artifact_name) + # Extract repository arguments specific to ssh-remote. + args = self.extract_repo_args("ssh", output[0], output[1], current_directory) + # Check if the object name doesn't end with `.dir` (indicating it's a file). + if not args[1].endswith(".dir"): + # Download a single file from ssh-remote. + object_name, download_loc, download_flag = sshremote_class_obj.download_file( + args[0], # host, + current_directory, + args[1], # remote_loc of the artifact + args[2] # name + ) + if download_flag: + # Return success if the file is downloaded successfully. + return ObjectDownloadSuccess(object_name, download_loc) + else: + raise ObjectDownloadFailure(object_name) + else: - # Extract repository arguments specific to ssh-remote. - args = self.extract_repo_args("ssh", output[0], output[1], current_directory) - # Check if the object name doesn't end with `.dir` (indicating it's a file). - if not args[1].endswith(".dir"): - # Download a single file from ssh-remote. - object_name, download_loc, download_flag = sshremote_class_obj.download_file( - args[0], # host, - current_directory, - args[1], # remote_loc of the artifact - args[2] # name + # If object name ends with `.dir`, download multiple files from a directory + # return total_files_in_directory, files_downloaded + total_files_in_directory, dir_files_downloaded, download_flag = sshremote_class_obj.download_directory( + args[0], # host, + current_directory, + args[1], # remote_loc of the artifact + args[2] # name ) - if download_flag: - # Return success if the file is downloaded successfully. - return ObjectDownloadSuccess(object_name, download_loc) - else: - raise ObjectDownloadFailure(object_name) - - else: - # If object name ends with `.dir`, download multiple files from a directory - # return total_files_in_directory, files_downloaded - total_files_in_directory, dir_files_downloaded, download_flag = sshremote_class_obj.download_directory( - args[0], # host, - current_directory, - args[1], # remote_loc of the artifact - args[2] # name - ) - if download_flag: - # Return success if all files in the directory are downloaded. - return BatchDownloadSuccess(dir_files_downloaded) - else: - # Calculate the number of files that failed to download. - file_failed_to_download = total_files_in_directory - dir_files_downloaded - raise BatchDownloadFailure(dir_files_downloaded, file_failed_to_download) + if download_flag: + # Return success if all files in the directory are downloaded. + return BatchDownloadSuccess(dir_files_downloaded) + else: + # Calculate the number of files that failed to download. + file_failed_to_download = total_files_in_directory - dir_files_downloaded + raise BatchDownloadFailure(dir_files_downloaded, file_failed_to_download) else: # Handle the case where no specific artifact name is provided. files_downloaded = 0 @@ -555,25 +551,22 @@ def run(self): # output[0] = name # output[1] = url # output[3]=artifact_hash - if output is None: - raise ArtifactNotFound(self.args.artifact_name[0]) + args = self.extract_repo_args("osdf", output[0], output[1], current_directory) + download_flag, message = osdfremote_class_obj.download_artifacts( + dvc_config_op, + args[0], # s_url of the artifact + cache_path, + current_directory, + args[1], # download_loc of the artifact + args[2], # name of the artifact + output[3] #Artifact Hash + ) + + if download_flag : + status = MsgSuccess(msg_str = message) else: - args = self.extract_repo_args("osdf", output[0], output[1], current_directory) - download_flag, message = osdfremote_class_obj.download_artifacts( - dvc_config_op, - args[0], # s_url of the artifact - cache_path, - current_directory, - args[1], # download_loc of the artifact - args[2], # name of the artifact - output[3] #Artifact Hash - ) - - if download_flag : - status = MsgSuccess(msg_str = message) - else: - status = MsgFailure(msg_str = message) - return status + status = MsgFailure(msg_str = message) + return status else: for name, url in name_url_dict.items(): total_files_count += 1 @@ -611,35 +604,31 @@ def run(self): output = self.search_artifact(name_url_dict, dvc_config_op["core.remote"]) # output[0] = name # output[1] = url - if output is None: - raise ArtifactNotFound(self.args.artifact_name[0]) - else: - args = self.extract_repo_args("amazons3", output[0], output[1], current_directory) - if args[0] and args[1] and args[2]: - if not args[1].endswith(".dir"): - object_name, download_loc, download_flag = amazonS3_class_obj.download_file( - current_directory, - args[0], # bucket_name - args[1], # object_name - args[2], # download_loc - ) - if download_flag: - return ObjectDownloadSuccess(object_name, download_loc) - else: - return ObjectDownloadFailure(object_name) - else: - total_files_in_directory, dir_files_downloaded, download_flag = amazonS3_class_obj.download_directory(current_directory, - args[0], # bucket_name - args[1], # object_name - args[2], # download_loc - ) + args = self.extract_repo_args("amazons3", output[0], output[1], current_directory) + if args[0] and args[1] and args[2]: + if not args[1].endswith(".dir"): + object_name, download_loc, download_flag = amazonS3_class_obj.download_file( + current_directory, + args[0], # bucket_name + args[1], # object_name + args[2], # download_loc + ) if download_flag: - return BatchDownloadSuccess(dir_files_downloaded) - else: - file_failed_to_download = total_files_in_directory - dir_files_downloaded - raise BatchDownloadFailure(dir_files_downloaded, file_failed_to_download) - - + return ObjectDownloadSuccess(object_name, download_loc) + else: + return ObjectDownloadFailure(object_name) + else: + total_files_in_directory, dir_files_downloaded, download_flag = amazonS3_class_obj.download_directory(current_directory, + args[0], # bucket_name + args[1], # object_name + args[2], # download_loc + ) + if download_flag: + return BatchDownloadSuccess(dir_files_downloaded) + else: + file_failed_to_download = total_files_in_directory - dir_files_downloaded + raise BatchDownloadFailure(dir_files_downloaded, file_failed_to_download) + else: files_downloaded = 0 files_failed_to_download = 0 diff --git a/cmflib/commands/artifact/push.py b/cmflib/commands/artifact/push.py index 3ad27182..998373d2 100644 --- a/cmflib/commands/artifact/push.py +++ b/cmflib/commands/artifact/push.py @@ -35,7 +35,8 @@ CmfNotConfigured, ArtifactPushSuccess, MissingArgument, - DuplicateArgumentNotAllowed) + DuplicateArgumentNotAllowed +) class CmdArtifactPush(CmdBase): def run(self): @@ -50,6 +51,16 @@ def run(self): if output.find("'cmf' is not configured.") != -1: raise CmfNotConfigured(output) + cmd_args = { + "file_name": self.args.file_name, + "pipeline_name": self.args.pipeline_name + } + for arg_name, arg_value in cmd_args.items(): + if arg_value: + if arg_value[0] == "": + raise MissingArgument(arg_name) + elif len(arg_value) > 1: + raise DuplicateArgumentNotAllowed(arg_name,("-"+arg_name[0])) out_msg = check_minio_server(dvc_config_op) if dvc_config_op["core.remote"] == "minio" and out_msg != "SUCCESS": @@ -71,10 +82,6 @@ def run(self): current_directory = os.getcwd() if not self.args.file_name: # If self.args.file_name is None or an empty list ([]). mlmd_file_name = "./mlmd" # Default path for mlmd file name. - elif len(self.args.file_name) > 1: # If the user provided more than one file name. - raise DuplicateArgumentNotAllowed("file_name", "-f") - elif not self.args.file_name[0]: # self.args.file_name[0] is an empty string (""). - raise MissingArgument("file name") else: mlmd_file_name = self.args.file_name[0].strip() if mlmd_file_name == "mlmd": @@ -82,17 +89,12 @@ def run(self): current_directory = os.path.dirname(mlmd_file_name) if not os.path.exists(mlmd_file_name): raise FileNotFound(mlmd_file_name, current_directory) + # creating cmfquery object query = cmfquery.CmfQuery(mlmd_file_name) - # Put a check to see whether pipline exists or not - if self.args.pipeline_name is not None and len(self.args.pipeline_name) > 1: - raise DuplicateArgumentNotAllowed("pipeline_name", "-p") - elif not self.args.pipeline_name[0]: # self.args.pipeline_name[0] is an empty string (""). - raise MissingArgument("pipeline name") - else: - pipeline_name = self.args.pipeline_name[0] - + pipeline_name = self.args.pipeline_name[0] + # Put a check to see whether pipline exists or not if not query.get_pipeline_id(pipeline_name) > 0: raise PipelineNotFound(pipeline_name) diff --git a/cmflib/commands/execution/list.py b/cmflib/commands/execution/list.py index 8ea705f4..146775b7 100644 --- a/cmflib/commands/execution/list.py +++ b/cmflib/commands/execution/list.py @@ -81,13 +81,21 @@ def display_table(self, df: pd.DataFrame) -> None: start_index = end_index def run(self): + cmd_args = { + "file_name": self.args.file_name, + "pipeline_name": self.args.pipeline_name, + "execution_uuid": self.args.execution_uuid + } + for arg_name, arg_value in cmd_args.items(): + if arg_value: + if arg_value[0] == "": + raise MissingArgument(arg_name) + elif len(arg_value) > 1: + raise DuplicateArgumentNotAllowed(arg_name,("-"+arg_name[0])) + current_directory = os.getcwd() if not self.args.file_name: # If self.args.file_name is None or an empty list ([]). mlmd_file_name = "./mlmd" # Default path for mlmd file name. - elif len(self.args.file_name) > 1: # If the user provided more than one file name. - raise DuplicateArgumentNotAllowed("file_name", "-f") - elif not self.args.file_name[0]: # self.args.file_name[0] is an empty string (""). - raise MissingArgument("file name") else: mlmd_file_name = self.args.file_name[0].strip() if mlmd_file_name == "mlmd": @@ -100,14 +108,7 @@ def run(self): # Creating cmfquery object. query = cmfquery.CmfQuery(mlmd_file_name) - # Check if pipeline exists in mlmd. - if self.args.pipeline_name is not None and len(self.args.pipeline_name) > 1: - raise DuplicateArgumentNotAllowed("pipeline_name", "-p") - elif not self.args.pipeline_name[0]: # self.args.pipeline_name[0] is an empty string (""). - raise MissingArgument("pipeline name") - else: - pipeline_name = self.args.pipeline_name[0] - + pipeline_name = self.args.pipeline_name[0] df = query.get_all_executions_in_pipeline(pipeline_name) # Check if the DataFrame is empty, indicating the pipeline name does not exist. @@ -121,10 +122,6 @@ def run(self): # Process execution ID if provided if not self.args.execution_uuid: # If self.args.execution_uuid is None or an empty list ([]). pass - elif len(self.args.execution_uuid) > 1: # If the user provided more than one execution_uuid. - raise DuplicateArgumentNotAllowed("execution_uuid", "-e") - elif not self.args.execution_uuid[0]: # self.args.execution_uuid[0] is an empty string (""). - raise MissingArgument("execution uuid") else: df = df[df['Execution_uuid'].apply(lambda x: self.args.execution_uuid[0] in x.split(","))] # Used dataframe based on execution uuid if not df.empty: diff --git a/cmflib/commands/metadata/export.py b/cmflib/commands/metadata/export.py index 3fdbaf06..d56ebdfc 100644 --- a/cmflib/commands/metadata/export.py +++ b/cmflib/commands/metadata/export.py @@ -27,7 +27,9 @@ DuplicateArgumentNotAllowed, MissingArgument, NoChangesMadeInfo, - MetadataExportToJson + MetadataExportToJson, + DirectoryNotfound, + MsgFailure ) # This class export local mlmd data to a json file @@ -42,20 +44,28 @@ def create_full_path(self, current_directory: str, json_file_name: str) -> str: full_path_to_dump = json_file_name return full_path_to_dump else: - return f"{current_directory} doesn't exists." + raise DirectoryNotfound(current_directory) else: - return "Provide path with file name." + raise MsgFailure(msg_str = "Provide path with file name.") def run(self): + cmd_args = { + "file_name": self.args.file_name, + "pipeline_name": self.args.pipeline_name, + "json_file_name": self.args.json_file_name + } + for arg_name, arg_value in cmd_args.items(): + if arg_value: + if arg_value[0] == "": + raise MissingArgument(arg_name) + elif len(arg_value) > 1: + raise DuplicateArgumentNotAllowed(arg_name,("-"+arg_name[0])) + current_directory = os.getcwd() full_path_to_dump = "" if not self.args.file_name: # If self.args.file_name is None or an empty list ([]). mlmd_file_name = "./mlmd" # Default path for mlmd file name. - elif len(self.args.file_name) > 1: # If the user provided more than one file name. - raise DuplicateArgumentNotAllowed("file_name", "-f") - elif not self.args.file_name[0]: # self.args.file_name[0] is an empty string (""). - raise MissingArgument("file name") else: mlmd_file_name = self.args.file_name[0].strip() # Removing starting and ending whitespaces. if mlmd_file_name == "mlmd": @@ -68,23 +78,12 @@ def run(self): # Initialising cmfquery class. query = cmfquery.CmfQuery(mlmd_file_name) - # Check if pipeline exists in mlmd . - if self.args.pipeline_name is not None and len(self.args.pipeline_name) > 1: - raise DuplicateArgumentNotAllowed("pipeline_name", "-p") - elif not self.args.pipeline_name[0]: # self.args.pipeline_name[0] is an empty string (""). - raise MissingArgument("pipeline name") - else: - pipeline_name = self.args.pipeline_name[0] - + pipeline_name = self.args.pipeline_name[0] pipeline = query.get_pipeline_id(pipeline_name) if pipeline > 0: if not self.args.json_file_name: # If self.args.json_file_name is None or an empty list ([]). json_file_name = self.args.json_file_name - elif len(self.args.json_file_name) > 1: # If the user provided more than one json file name. - raise DuplicateArgumentNotAllowed("json file", "-j") - elif not self.args.json_file_name[0]: # self.args.json_file_name[0] is an empty string (""). - raise MissingArgument("json file") else: json_file_name = self.args.json_file_name[0].strip() @@ -112,7 +111,7 @@ def run(self): full_path_to_dump = os.getcwd() + f"/{pipeline_name}.json" # Pulling data from local mlmd file. - json_payload = query.dumptojson(pipeline_name,None) + json_payload = query.dumptojson(pipeline_name, None) # Write metadata into json file. with open(full_path_to_dump, 'w') as f: diff --git a/cmflib/commands/metadata/pull.py b/cmflib/commands/metadata/pull.py index 0cec9508..c066f477 100644 --- a/cmflib/commands/metadata/pull.py +++ b/cmflib/commands/metadata/pull.py @@ -55,23 +55,23 @@ def run(self): cmd = "pull" status = 0 exec_uuid = None - if self.args.pipeline_name is not None and len(self.args.pipeline_name) > 1: - raise DuplicateArgumentNotAllowed("pipeline_name", "-p") - elif not self.args.pipeline_name[0]: # self.args.pipeline_name[0] is an empty string (""). - raise MissingArgument("pipeline name") + + cmd_args = { + "file_name": self.args.file_name, + "pipeline_name": self.args.pipeline_name, + "execution_uuid": self.args.execution_uuid + } + for arg_name, arg_value in cmd_args.items(): + if arg_value: + if arg_value[0] == "": + raise MissingArgument(arg_name) + elif len(arg_value) > 1: + raise DuplicateArgumentNotAllowed(arg_name,("-"+arg_name[0])) if not self.args.execution_uuid: # If self.args.execution_uuid[0] is None or an empty list ([]). pass - elif len(self.args.execution_uuid) > 1: # If the user provided more than one execution_uuid. - raise DuplicateArgumentNotAllowed("execution_uuid", "-e") - elif not self.args.execution_uuid[0]: # self.args.execution_uuid[0] is an empty string (""). - raise MissingArgument("execution_uuid") if self.args.file_name: # setting directory where mlmd file will be dumped - if len(self.args.file_name) > 1: # If the user provided more than one file name. - raise DuplicateArgumentNotAllowed("file_name", "-f") - elif not self.args.file_name[0]: # self.args.file_name[0] is an empty string (""). - raise MissingArgument("file name") if not os.path.isdir(self.args.file_name[0]): temp = os.path.dirname(self.args.file_name[0]) if temp != "": diff --git a/cmflib/commands/metadata/push.py b/cmflib/commands/metadata/push.py index aff10260..1ac443be 100644 --- a/cmflib/commands/metadata/push.py +++ b/cmflib/commands/metadata/push.py @@ -56,13 +56,23 @@ def run(self): attr_dict = CmfConfig.read_config(config_file_path) url = attr_dict.get("cmf-server-ip", "http://127.0.0.1:80") + cmd_args = { + "file_name": self.args.file_name, + "pipeline_name": self.args.pipeline_name, + "execution_uuid": self.args.execution_uuid, + "tensorboad": self.args.tensorboard + } + for arg_name, arg_value in cmd_args.items(): + if arg_value: + if arg_value[0] == "": + raise MissingArgument(arg_name) + elif len(arg_value) > 1: + raise DuplicateArgumentNotAllowed(arg_name,("-"+arg_name[0])) + + current_directory = os.getcwd() if not self.args.file_name: # If self.args.file_name is None or an empty list ([]). mlmd_file_name = "./mlmd" # Default path for mlmd file name. - elif len(self.args.file_name) > 1: # If the user provided more than one file name. - raise DuplicateArgumentNotAllowed("file_name", "-f") - elif not self.args.file_name[0]: # self.args.file_name[0] is an empty string (""). - raise MissingArgument("file name") else: mlmd_file_name = self.args.file_name[0].strip() if mlmd_file_name == "mlmd": @@ -80,108 +90,95 @@ def run(self): status_code = 0 # Checks if pipeline name exists - if self.args.pipeline_name is not None and len(self.args.pipeline_name) > 1: - raise DuplicateArgumentNotAllowed("pipeline_name", "-p") - elif not self.args.pipeline_name[0]: # self.args.pipeline_name[0] is an empty string (""). - raise MissingArgument("pipeline name") - else: - pipeline_name = self.args.pipeline_name[0] - if pipeline_name in query.get_pipeline_names(): - print("metadata push started") - print("........................................") - # converts mlmd file to json format - json_payload = query.dumptojson(pipeline_name, None) - - # checks if execution is given by user - if not self.args.execution_uuid: # If self.args.execution_uuid is None or an empty list ([]). - exec_uuid = None - response = server_interface.call_mlmd_push(json_payload, url, exec_uuid, pipeline_name) - elif len(self.args.execution_uuid) > 1: # If the user provided more than one execution. - raise DuplicateArgumentNotAllowed("execution_uuid", "-e") - elif not self.args.execution_uuid[0]: # self.args.execution_uuid[0] is an empty string (""). - raise MissingArgument("execution_uuid") - else: - exec_uuid = self.args.execution_uuid[0] - mlmd_data = json.loads(json_payload)["Pipeline"] - # checks if given execution present in mlmd - for i in mlmd_data[0]["stages"]: - for j in i["executions"]: - # created exec_uuid of list if multiple uuid present for single execution. - # for eg: f9da581c-d16c-11ef-9809-9350156ed1ac,32f17f4a-d16d-11ef-9809-9350156ed1ac - uuid_list = j['properties']['Execution_uuid'].split(",") - # check if user specified exec_uuid exists inside local mlmd - if exec_uuid in uuid_list: - execution_flag = 1 - # calling mlmd_push api to push mlmd_data = json.loads(json_payload)["Pipeline"] - # checks if given execution present in mlmdmlmd file to cmf-server - response = server_interface.call_mlmd_push( - json_payload, url, exec_uuid, pipeline_name - ) - break - if execution_flag == 0: - raise ExecutionUUIDNotFound(exec_uuid) - status_code = response.status_code - if status_code == 200: - output = "" - display_output = "" - if response.json()['status']=="success": - display_output = "mlmd is successfully pushed." - output = MlmdFilePushSuccess(mlmd_file_name) - if response.json()["status"]=="exists": - display_output = "Executions already exists." - output = ExecutionsAlreadyExists() - if not self.args.tensorboard: - return output - elif len(self.args.tensorboard) > 1: # If the user provided more than one tensorboard name. - raise DuplicateArgumentNotAllowed("tensorboard", "-t") - elif not self.args.tensorboard[0]: # self.args.tensorboard[0] is an empty string (""). - raise MissingArgument("tensorboard") - print(display_output) - # /tensorboard api call is done only if mlmd push is successfully completed - # tensorboard parameter is passed - print("......................................") - print("tensorboard logs upload started!!") - print("......................................") - - - tensorboard = self.args.tensorboard[0] - # check if the path provided is for a file - if os.path.isfile(tensorboard): - file_name = os.path.basename(tensorboard) - tresponse = server_interface.call_tensorboard(url, pipeline_name, file_name, tensorboard) - tstatus_code = tresponse.status_code - if tstatus_code == 200: - # give status code as success - return TensorboardPushSuccess(file_name) - else: - # give status code as failure - return TensorboardPushFailure(file_name,tresponse.text) - # If path provided is a directory - elif os.path.isdir(tensorboard): - # Recursively push all files and subdirectories - for root, dirs, files in os.walk(tensorboard): - for file_name in files: - file_path = os.path.join(root, file_name) - relative_path = os.path.relpath(file_path, tensorboard) - tresponse = server_interface.call_tensorboard(url, pipeline_name, relative_path, file_path) - if tresponse.status_code == 200: - print(f"tensorboard logs: File {file_name} uploaded successfully.") - else: - # give status as failure - return TensorboardPushFailure(file_name,tresponse.text) - return TensorboardPushSuccess() + pipeline_name = self.args.pipeline_name[0] + if pipeline_name in query.get_pipeline_names(): + print("metadata push started") + print("........................................") + # converts mlmd file to json format + json_payload = query.dumptojson(pipeline_name, None) + + # checks if execution is given by user + if not self.args.execution_uuid: # If self.args.execution_uuid is None or an empty list ([]). + exec_uuid = None + response = server_interface.call_mlmd_push(json_payload, url, exec_uuid, pipeline_name) + else: + exec_uuid = self.args.execution_uuid[0] + mlmd_data = json.loads(json_payload)["Pipeline"] + # checks if given execution present in mlmd + for i in mlmd_data[0]["stages"]: + for j in i["executions"]: + # created exec_uuid of list if multiple uuid present for single execution. + # for eg: f9da581c-d16c-11ef-9809-9350156ed1ac,32f17f4a-d16d-11ef-9809-9350156ed1ac + uuid_list = j['properties']['Execution_uuid'].split(",") + # check if user specified exec_uuid exists inside local mlmd + if exec_uuid in uuid_list: + execution_flag = 1 + # calling mlmd_push api to push mlmd_data = json.loads(json_payload)["Pipeline"] + # checks if given execution present in mlmdmlmd file to cmf-server + response = server_interface.call_mlmd_push( + json_payload, url, exec_uuid, pipeline_name + ) + break + if execution_flag == 0: + raise ExecutionUUIDNotFound(exec_uuid) + status_code = response.status_code + if status_code == 200: + output = "" + display_output = "" + if response.json()['status']=="success": + display_output = "mlmd is successfully pushed." + output = MlmdFilePushSuccess(mlmd_file_name) + if response.json()["status"]=="exists": + display_output = "Executions already exists." + output = ExecutionsAlreadyExists() + if not self.args.tensorboard: + return output + print(display_output) + # /tensorboard api call is done only if mlmd push is successfully completed + # tensorboard parameter is passed + print("......................................") + print("tensorboard logs upload started!!") + print("......................................") + + + tensorboard = self.args.tensorboard[0] + # check if the path provided is for a file + if os.path.isfile(tensorboard): + file_name = os.path.basename(tensorboard) + tresponse = server_interface.call_tensorboard(url, pipeline_name, file_name, tensorboard) + tstatus_code = tresponse.status_code + if tstatus_code == 200: + # give status code as success + return TensorboardPushSuccess(file_name) else: - return InvalidTensorboardFilePath() - elif status_code==422 and response.json()["status"]=="version_update": - raise UpdateCmfVersion - elif status_code == 404: - raise CmfServerNotAvailable - elif status_code == 500: - raise InternalServerError + # give status code as failure + return TensorboardPushFailure(file_name,tresponse.text) + # If path provided is a directory + elif os.path.isdir(tensorboard): + # Recursively push all files and subdirectories + for root, dirs, files in os.walk(tensorboard): + for file_name in files: + file_path = os.path.join(root, file_name) + relative_path = os.path.relpath(file_path, tensorboard) + tresponse = server_interface.call_tensorboard(url, pipeline_name, relative_path, file_path) + if tresponse.status_code == 200: + print(f"tensorboard logs: File {file_name} uploaded successfully.") + else: + # give status as failure + return TensorboardPushFailure(file_name,tresponse.text) + return TensorboardPushSuccess() else: - return "ERROR: Status Code = {status_code}. Unable to push mlmd." + return InvalidTensorboardFilePath() + elif status_code==422 and response.json()["status"]=="version_update": + raise UpdateCmfVersion + elif status_code == 404: + raise CmfServerNotAvailable + elif status_code == 500: + raise InternalServerError else: - raise PipelineNotFound(pipeline_name) + return "ERROR: Status Code = {status_code}. Unable to push mlmd." + else: + raise PipelineNotFound(pipeline_name) def add_parser(subparsers, parent_parser): diff --git a/cmflib/commands/repo/push.py b/cmflib/commands/repo/push.py index 92df8a8e..dc753d68 100644 --- a/cmflib/commands/repo/push.py +++ b/cmflib/commands/repo/push.py @@ -36,7 +36,8 @@ ArtifactPushSuccess, Minios3ServerInactive, CmfNotConfigured, - FileNotFound,) + FileNotFound + ) class CmdRepoPush(CmdBase): diff --git a/examples/example-get-started/src/parse.py b/examples/example-get-started/src/parse.py index 51e4f195..ff890f44 100644 --- a/examples/example-get-started/src/parse.py +++ b/examples/example-get-started/src/parse.py @@ -77,6 +77,7 @@ def parse(input_file: str, output_dir: str) -> None: _ = metawriter.log_dataset(output_ds.train, "output") _ = metawriter.log_dataset(output_ds.test, "output") + # Automatically commits code, ensuring no need to manually commit before using the 'git repo push' command. metawriter.finalize() From 5884aa850bb54be72013013f5672a8d0de81c9cf Mon Sep 17 00:00:00 2001 From: AyeshaSanadi Date: Mon, 27 Jan 2025 17:22:39 +0530 Subject: [PATCH 41/41] Removed execution uuid from expanded row --- ui/src/components/ExecutionTable/index.jsx | 19 ++++++++++++++----- ui/src/pages/executions/index.jsx | 4 ---- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/ui/src/components/ExecutionTable/index.jsx b/ui/src/components/ExecutionTable/index.jsx index d8cfc1ec..f0042f23 100644 --- a/ui/src/components/ExecutionTable/index.jsx +++ b/ui/src/components/ExecutionTable/index.jsx @@ -128,6 +128,11 @@ const ExecutionTable = ({ executions, onSort, onFilter }) => { } }; + const createUniqueUuids = (exe_uuid) =>{ + // Removing repeated execution uuid from executions. + return [...new Set(exe_uuid.split(","))].join(",") + } + return (
{ {expandedRow === index ? "-" : "+"} - {data.Execution_uuid} + {createUniqueUuids(data.Execution_uuid)} {data.Context_Type} {data.Execution} {data.Git_Repo} @@ -211,10 +216,14 @@ const ExecutionTable = ({ executions, onSort, onFilter }) => { return ( - {key} - - {value ? value : "Null"} - + {key !='Execution_uuid' && + <> + {key} + + {value ? value : "Null"} + + + } ); diff --git a/ui/src/pages/executions/index.jsx b/ui/src/pages/executions/index.jsx index ebb07a1a..ff004f76 100644 --- a/ui/src/pages/executions/index.jsx +++ b/ui/src/pages/executions/index.jsx @@ -95,10 +95,6 @@ const Executions = () => { filterValue, ) .then((data) => { - // Removing repeated execution uuid from executions. - data.items.map((data, index) => ( - data.Execution_uuid = [...new Set(data.Execution_uuid.split(","))].join(",") - )); setExecutions(data.items); setTotalItems(data.total_items); });