Skip to content

Commit

Permalink
changed the core functionality of cmf artifact push. (#174)
Browse files Browse the repository at this point in the history
* changed the core functionality of cmf artifact push. Now it will take pipleine name and mlmd file name (in case user gave metadata file name other than default 'mlmd') and then will push only the file mentioned in the mlmd file

* made some changes to speed up the dvc push

* addressing merge conflicts

* removed 'for loop' earlier added for files and skipped metrics files

* Update cmf_client.md

making changes related to cmf artifact push

* Update step-by-step.md

making changes related to cmf artifact push command change

* Update index.md

Updating docs for command argument change
  • Loading branch information
varkha-d-sharma authored Jul 19, 2024
1 parent 172a7a1 commit 49ae445
Show file tree
Hide file tree
Showing 5 changed files with 113 additions and 22 deletions.
73 changes: 68 additions & 5 deletions cmflib/commands/artifact/push.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@
import argparse
import os
import subprocess
import time

from cmflib import cmfquery
from cmflib.cli.command import CmdBase
from cmflib.cli.utils import check_minio_server
from cmflib.utils.helper_functions import generate_osdf_token
Expand All @@ -38,19 +40,66 @@ def run(self):
out_msg = check_minio_server(dvc_config_op)
if dvc_config_op["core.remote"] == "minio" and out_msg != "SUCCESS":
return out_msg
elif dvc_config_op["core.remote"] == "osdf":
#print("key_id="+cmf_config["osdf-key_id"])
if dvc_config_op["core.remote"] == "osdf":
#print("key_id="+cmf_config["osdf-key_id"])
dynamic_password = generate_osdf_token(cmf_config["osdf-key_id"],cmf_config["osdf-key_path"],cmf_config["osdf-key_issuer"])
#print("Dynamic Password"+dynamic_password)
dvc_add_attribute(dvc_config_op["core.remote"],"password",dynamic_password)
#The Push URL will be something like: https://<Path>/files/md5/[First Two of MD5 Hash]
result = dvc_push()
#print(result)
return result
else:
result = dvc_push()
return result

current_directory = os.getcwd()
# Default path of mlmd file
mlmd_file_name = "./mlmd"
if self.args.file_name:
mlmd_file_name = self.args.file_name
if mlmd_file_name == "mlmd":
mlmd_file_name = "./mlmd"
current_directory = os.path.dirname(mlmd_file_name)
if not os.path.exists(mlmd_file_name):
return f"ERROR: {mlmd_file_name} doesn't exists in {current_directory} directory."

# creating cmfquery object
query = cmfquery.CmfQuery(mlmd_file_name)

# Put a check to see whether pipline exists or not
pipeline_name = self.args.pipeline_name
if not query.get_pipeline_id(pipeline_name) > 0:
return f"ERROR: Pipeline {pipeline_name} doesn't exist!!"

stages = query.get_pipeline_stages(self.args.pipeline_name)
executions = []
identifiers = []

for stage in stages:
# getting all executions for stages
executions = query.get_all_executions_in_stage(stage)
# check if stage has executions
if len(executions) > 0:
# converting it to dictionary
dict_executions = executions.to_dict("dict")
for id in dict_executions["id"].values():
identifiers.append(id)
else:
print("No Executions found for " + stage + " stage.")

names = []
if len(identifiers) == 0: # check if there are no executions
return "No executions found."
for identifier in identifiers:
artifacts = query.get_all_artifacts_for_execution(
identifier
) # getting all artifacts with id
# dropping artifact with type 'metrics' as metrics doesn't have physical file
artifacts = artifacts[artifacts['type'] != 'Metrics']
# adding .dvc at the end of every file as it is needed for pull
artifacts['name'] = artifacts['name'].apply(lambda name: f"{name.split(':')[0]}.dvc")
names.extend(artifacts['name'].tolist())
file_set = set(names)
result = dvc_push(list(file_set))
return result

def add_parser(subparsers, parent_parser):
HELP = "Push artifacts to the user configured artifact repo."
Expand All @@ -63,4 +112,18 @@ def add_parser(subparsers, parent_parser):
formatter_class=argparse.RawDescriptionHelpFormatter,
)

required_arguments = parser.add_argument_group("required arguments")

required_arguments.add_argument(
"-p",
"--pipeline_name",
required=True,
help="Specify Pipeline name.",
metavar="<pipeline_name>",
)

parser.add_argument(
"-f", "--file_name", help="Specify mlmd file name.", metavar="<file_name>"
)

parser.set_defaults(func=CmdArtifactPush)
47 changes: 33 additions & 14 deletions cmflib/dvc_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import subprocess
import dvc.api
import dvc.exceptions

from typing import List, Optional

def check_git_remote() -> bool:
process = ""
Expand Down Expand Up @@ -429,20 +429,39 @@ def dvc_get_config() -> str:


# dvc push
def dvc_push() -> str:
def dvc_push(file_list: Optional[List[str]] = None) -> str:
commit = ""
try:
process = subprocess.Popen(['dvc', 'push'],
if file_list is None:
try:
process = subprocess.Popen(['dvc', 'push'],
stdout=subprocess.PIPE,
universal_newlines=True)
output, errs = process.communicate()
commit = output.strip()

except Exception as err:
print(f"Unexpected {err}, {type(err)}")
if isinstance(object, subprocess.Popen):
process.kill()
outs, errs = process.communicate()
print(f"Unexpected {outs}")
print(f"Unexpected {errs}")

else:
file_list.insert(0, 'dvc')
file_list.insert(1, 'push')
try:
process = subprocess.Popen(file_list,
stdout=subprocess.PIPE,
universal_newlines=True)
output, errs = process.communicate()
commit = output.strip()

except Exception as err:
print(f"Unexpected {err}, {type(err)}")
if isinstance(object, subprocess.Popen):
process.kill()
outs, errs = process.communicate()
print(f"Unexpected {outs}")
print(f"Unexpected {errs}")
output, errs = process.communicate()
commit = output.strip()

except Exception as err:
print(f"Unexpected {err}, {type(err)}")
if isinstance(object, subprocess.Popen):
process.kill()
outs, errs = process.communicate()
print(f"Unexpected {outs}")
print(f"Unexpected {errs}")
return commit
11 changes: 10 additions & 1 deletion docs/cmf_client/cmf_client.md
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,16 @@ Usage: cmf artifact push [-h] -p [pipeline_name] -f [file_name]
```
`cmf artifact push` command push artifacts from the user's local machine to the user configured artifact repository.
```
cmf artifact push
cmf artifact push -p 'pipeline_name'
```
Required Arguments
```
-p [pipeline_name], --pipeline-name [pipeline_name] Specify Pipeline name.
```
Optional Arguments
```
-h, --help show this help message and exit.
-f [file_name],--file-name [file_name] Specify mlmd file name.
```
## cmf metadata
```
Expand Down
2 changes: 1 addition & 1 deletion docs/cmf_client/step-by-step.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ More information is available inside [Getting Started](https://hewlettpackard.gi

Push artifacts in the artifact repo initialised in the [Initialize cmf](#initialize-cmf) step.
```
cmf artifact push
cmf artifact push -p 'Test-env'
```
Check [Overview](./cmf_client.md) page for more details.

Expand Down
2 changes: 1 addition & 1 deletion docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,7 @@ Click the terminal icon<br>
cd example-get-started
cmf init local --path /home/user/local-storage --git-remote-url https://github.com/user/experiment-repo.git --cmf-server-url http://127.0.0.1:80 --neo4j-user neo4j --neo4j-password password --neo4j-uri bolt://localhost:7687
sh test_script.sh
cmf artifact push
cmf artifact push -p 'Test-env'
```
The above steps will run a pre coded example pipeline and the metadata is stored in a file named "mlmd".<br>
The artifacts created will be pushed to configured dvc remote (default: /home/dvc_remote)<br>
Expand Down

0 comments on commit 49ae445

Please sign in to comment.