Merge pull request #227 from dapomeroy/file_upload

Add support to import & upload experiment results
GoogleCloudPlatform · Aug 1, 2023 · d532a6b · d532a6b
2 parents 351d160 + 1efaeec
commit d532a6b
Show file tree

Hide file tree

Showing 9 changed files with 275 additions and 34 deletions.
diff --git a/lib/ramble/ramble/cmd/results.py b/lib/ramble/ramble/cmd/results.py
@@ -0,0 +1,60 @@
+# Copyright 2022-2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+# https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+# <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
+# option. This file may not be copied, modified, or distributed
+# except according to those terms.
+
+import llnl.util.tty as tty
+import json
+
+import ramble.experimental.uploader
+
+description = "take actions on experiment results"
+section = "results"
+level = "short"
+
+
+def setup_parser(subparser):
+    sp = subparser.add_subparsers(metavar='SUBCOMMAND',
+                                  dest='results_command')
+
+    # Upload
+    upload_parser = sp.add_parser('upload', help=results_upload.__doc__)
+    upload_parser.add_argument(
+        'filename', help='path of file to upload')
+
+
+def results_upload(args):
+    """Imports Ramble experiment results from JSON file and uploads them as
+    specified in the upload block of Ramble's config file."""
+    imported_results = import_results_file(args.filename)
+
+    ramble.experimental.uploader.upload_results(imported_results)
+
+
+def import_results_file(filename):
+    """
+    Import Ramble experiment results from a JSON file.
+    """
+    tty.debug("File to import:")
+    tty.debug(filename)
+
+    imported_file = open(filename)
+
+    try:
+        tty.msg("Import file...")
+        parsed_json_file = json.load(imported_file)
+        # Check if data contains an experiment
+        if parsed_json_file.get('experiments'):
+            return parsed_json_file
+        else:
+            tty.die("Error parsing file: Does not contain valid data to upload.")
+    except ValueError:
+        tty.die("Error parsing file: Invalid JSON formatting.")
+
+
+def results(parser, args):
+    action = {'upload': results_upload}
+    action[args.results_command](args)
diff --git a/lib/ramble/ramble/cmd/workspace.py b/lib/ramble/ramble/cmd/workspace.py
@@ -26,6 +26,7 @@
 import ramble.workspace
 import ramble.workspace.shell
 import ramble.experiment_set
+import ramble.experimental.uploader
 import ramble.software_environments
 import ramble.util.colors as rucolor
 
@@ -363,7 +364,7 @@ def workspace_analyze(args):
 
     # FIXME: this will fire the analyze logic of twice currently
     if args.upload:
-        ws.upload_results()
+        ramble.experimental.uploader.upload_results(ws.results)
 
 
 def workspace_info_setup_parser(subparser):

diff --git a/lib/ramble/ramble/experimental/uploader.py b/lib/ramble/ramble/experimental/uploader.py
@@ -11,20 +11,23 @@
 import sys
 import math
 
+import ramble.config
+from ramble.config import ConfigError
+
 
 default_node_type_val = "Not Specified"
 
 
 class Uploader():
     # TODO: should the class store the base uri?
-    def perform_upload(self, uri, workspace_name, data):
+    def perform_upload(self, uri, data):
         # TODO: move content checking to __init__ ?
         if not uri:
             raise ValueError(
-                "%s requires %s argument." % (self.__class__, uri))
+                f"{self.__class__} requires {uri} argument.")
         if not data:
             raise ValueError(
-                "%s requires %s argument." % (self.__class__, data))
+                f"{self.__class__} requires %{data} argument.")
         pass
 
 
@@ -109,6 +112,31 @@ def determine_node_type(experiment, contexts):
             continue
 
 
+def upload_results(results):
+    if ramble.config.get('config:upload'):
+        # Read upload type and push it there
+        if ramble.config.get('config:upload:type') == 'BigQuery':  # TODO: enum?
+            try:
+                formatted_data = ramble.experimental.uploader.format_data(results)
+            except KeyError:
+                tty.die("Error parsing file: Does not contain valid data to upload.")
+            # TODO: strategy object?
+
+            uploader = BigQueryUploader()
+
+            uri = ramble.config.get('config:upload:uri')
+            if not uri:
+                tty.die('No upload URI (config:upload:uri) in config.')
+
+            tty.msg('Uploading Results to ' + uri)
+            uploader.perform_upload(uri, formatted_data)
+        else:
+            raise ConfigError("Unknown config:upload:type value")
+
+    else:
+        raise ConfigError("Missing correct config:upload parameters")
+
+
 def format_data(data_in):
     """
     Goal: convert results to a more searchable and decomposed format for insertion
@@ -118,7 +146,7 @@ def format_data(data_in):
 
     .. code-block:: text
 
-        { expierment_name:
+        { experiment_name:
             { "CONTEXTS": {
                 "context_name": "FOM_name { unit: "value", "value":value" }
             ...}
@@ -197,7 +225,7 @@ def chunked_upload(self, table_id, data):
                 return error
         return error
 
-    def insert_data(self, uri: str, workspace_name, results) -> None:
+    def insert_data(self, uri: str, results) -> None:
 
         # It is expected that the user will create these tables outside of this
         # tooling
@@ -233,13 +261,13 @@ def insert_data(self, uri: str, workspace_name, results) -> None:
             else:
                 tty.die("Encountered errors while inserting rows: {}".format(errors))
 
-    def perform_upload(self, uri, workspace_name, results):
-        super().perform_upload(uri, workspace_name, results)
+    def perform_upload(self, uri, results):
+        super().perform_upload(uri, results)
 
         # import spack.util.spack_json as sjson
         # json_str = sjson.dump(results)
 
-        self.insert_data(uri, workspace_name, results)
+        self.insert_data(uri, results)
 
     # def get_max_current_id(uri, table):
         # TODO: Generating an id based on the max in use id is dangerous, and
@@ -251,7 +279,7 @@ def perform_upload(self, uri, workspace_name, results):
         # results = query_job.result()  # Waits for job to complete.
         # return results[0]
 
-    def get_expierment_id(experiment):
+    def get_experiment_id(experiment):
         # get_max_current_id(...) # Warning: dangerous..
 
         # This should be stable per machine/python version, but is not

diff --git a/lib/ramble/ramble/test/cmd/results.py b/lib/ramble/ramble/test/cmd/results.py
@@ -0,0 +1,39 @@
+# Copyright 2022-2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+# https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+# <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
+# option. This file may not be copied, modified, or distributed
+# except according to those terms.
+
+import py
+import pytest
+
+import ramble.paths
+import ramble.cmd.results
+
+INPUT_DATA = py.path.local(ramble.paths.test_path).join('data', 'results_upload')
+
+
+@pytest.mark.parametrize(
+    'filename,expected_output',
+    [
+        (
+            py.path.local(INPUT_DATA).join('test1_empty_experiments.json'),
+            'Error parsing file: Does not contain valid data to upload.',
+        ),
+        (
+            py.path.local(INPUT_DATA).join('test2_not_json.txt.json'),
+            'Error parsing file: Invalid JSON formatting.',
+        ),
+        (
+            py.path.local(INPUT_DATA).join('test3_malformed_json.json'),
+            'Error parsing file: Invalid JSON formatting',
+        ),
+    ],
+)
+def test_file_import_rejects_invalid_files(filename, expected_output, capsys):
+    with pytest.raises(SystemExit):
+        ramble.cmd.results.import_results_file(filename)
+        captured = capsys.readouterr()
+        assert expected_output in captured
diff --git a/lib/ramble/ramble/test/data/results_upload/test1_empty_experiments.json b/lib/ramble/ramble/test/data/results_upload/test1_empty_experiments.json
@@ -0,0 +1,4 @@
+{
+  "experiments": [
+  ]
+}
diff --git a/lib/ramble/ramble/test/data/results_upload/test2_not_json.txt.json b/lib/ramble/ramble/test/data/results_upload/test2_not_json.txt.json
@@ -0,0 +1 @@
+This is not a JSON file.
diff --git a/lib/ramble/ramble/test/data/results_upload/test3_malformed_json.json b/lib/ramble/ramble/test/data/results_upload/test3_malformed_json.json
@@ -0,0 +1,113 @@
+
+experiments": [
+  {
+    "name": "gromacs.water_gmx50.pme_single_rank",
+    "EXPERIMENT_CHAIN": [],
+    "RAMBLE_STATUS": "SUCCESS",
+    "RAMBLE_VARIABLES": {
+      "log_dir": "/home/user/gromacs_example/logs",
+      "env_name": "gromacs",
+      "experiments_file": "/home/user/gromacs_example/all_experiments",
+      "processes_per_node": "16",
+      "mpi_command": "mpirun -n 1 -ppn 16",
+      "batch_submit": "/home/user/gromacs_example/experiments/gromacs/water_gmx50/pme_single_rank/execute_experiment",
+      "n_ranks": "1",
+      "n_threads": "1",
+      "size": "0003",
+      "type": "pme",
+      "global_conf_name": "global_conf",
+      "base_name": "None",
+      "workspace_name": "gromacs_example",
+      "application_name": "gromacs",
+      "workload_name": "water_gmx50",
+      "experiment_name": "pme_single_rank",
+      "required_name": "None",
+      "application_namespace": "gromacs",
+      "workload_namespace": "gromacs.water_gmx50",
+      "experiment_namespace": "gromacs.water_gmx50.pme_single_rank",
+      "application_run_dir": "/home/user/gromacs_example/experiments/gromacs",
+      "application_input_dir": "/home/user/gromacs_example/inputs/gromacs",
+      "workload_run_dir": "/home/user/gromacs_example/experiments/gromacs/water_gmx50",
+      "workload_input_dir": "/home/user/gromacs_example/inputs/gromacs/water_gmx50",
+      "experiment_run_dir": "/home/user/gromacs_example/experiments/gromacs/water_gmx50/pme_single_rank",
+      "spack_env": "/home/user/gromacs_example/software/gromacs.water_gmx50",
+      "n_nodes": "1",
+      "experiment_template_name": "pme_single_rank",
+      "log_file": "/home/user/gromacs_example/experiments/gromacs/water_gmx50/pme_single_rank/pme_single_rank.out",
+      "input_path": "/home/user/gromacs_example/inputs/gromacs/water_gmx50/water_gmx50_bare/0003",
+      "water_gmx50_bare": "/home/user/gromacs_example/inputs/gromacs/water_gmx50/water_gmx50_bare",
+      "command": "rm -f \"/home/user/gromacs_example/experiments/gromacs/water_gmx50/pme_single_rank/pme_single_rank.out\"\ntouch \"/home/user/gromacs_example/experiments/gromacs/water_gmx50/pme_single_rank/pme_single_rank.out\"\n. /opt/apps/spack/share/spack/setup-env.sh\nspack env activate /home/user/gromacs_example/software/gromacs.water_gmx50\ngmx_mpi grompp -f /home/user/gromacs_example/inputs/gromacs/water_gmx50/water_gmx50_bare/0003/pme.mdp -c /home/user/gromacs_example/inputs/gromacs/water_gmx50/water_gmx50_bare/0003/conf.gro -p /home/user/gromacs_example/inputs/gromacs/water_gmx50/water_gmx50_bare/0003/topol.top -o exp_input.tpr >> \"/home/user/gromacs_example/experiments/gromacs/water_gmx50/pme_single_rank/pme_single_rank.out\"\nmpirun -n 1 -ppn 16 gmx_mpi mdrun -notunepme -dlb yes -v -resethway -noconfout -nsteps 4000 -s exp_input.tpr >> \"/home/user/gromacs_example/experiments/gromacs/water_gmx50/pme_single_rank/pme_single_rank.out\"",
+      "spack_setup": "",
+      "execute_experiment": "/home/user/gromacs_example/experiments/gromacs/water_gmx50/pme_single_rank/execute_experiment"
+    },
+    "RAMBLE_RAW_VARIABLES": {
+      "log_dir": "/home/user/gromacs_example/logs",
+      "env_name": "{application_name}",
+      "experiments_file": "/home/user/gromacs_example/all_experiments",
+      "processes_per_node": 16,
+      "mpi_command": "mpirun -n {n_ranks} -ppn {processes_per_node}",
+      "batch_submit": "{execute_experiment}",
+      "n_ranks": "1",
+      "n_threads": "1",
+      "size": "0003",
+      "type": "pme",
+      "global_conf_name": "global_conf",
+      "base_name": null,
+      "workspace_name": "gromacs_example",
+      "application_name": "gromacs",
+      "workload_name": "water_gmx50",
+      "experiment_name": "pme_single_rank",
+      "required_name": null,
+      "application_namespace": "gromacs",
+      "workload_namespace": "gromacs.water_gmx50",
+      "experiment_namespace": "gromacs.water_gmx50.pme_single_rank",
+      "application_run_dir": "/home/user/gromacs_example/experiments/{application_name}",
+      "application_input_dir": "/home/user/gromacs_example/inputs/{application_name}",
+      "workload_run_dir": "{application_run_dir}/{workload_name}",
+      "workload_input_dir": "{application_input_dir}/{workload_name}",
+      "experiment_run_dir": "{workload_run_dir}/{experiment_name}",
+      "spack_env": "/home/user/gromacs_example/software/{env_name}.{workload_name}",
+      "n_nodes": 1,
+      "experiment_template_name": "pme_single_rank",
+      "log_file": "{experiment_run_dir}/{experiment_name}.out",
+      "input_path": "{water_gmx50_bare}/{size}",
+      "water_gmx50_bare": "/home/user/gromacs_example/inputs/gromacs/water_gmx50/water_gmx50_bare",
+      "command": "rm -f \"{log_file}\"\ntouch \"{log_file}\"\n. /opt/apps/spack/share/spack/setup-env.sh\nspack env activate /home/user/gromacs_example/software/gromacs.water_gmx50\ngmx_mpi grompp -f /home/user/gromacs_example/inputs/gromacs/water_gmx50/water_gmx50_bare/0003/pme.mdp -c /home/user/gromacs_example/inputs/gromacs/water_gmx50/water_gmx50_bare/0003/conf.gro -p /home/user/gromacs_example/inputs/gromacs/water_gmx50/water_gmx50_bare/0003/topol.top -o exp_input.tpr >> \"/home/user/gromacs_example/experiments/gromacs/water_gmx50/pme_single_rank/pme_single_rank.out\"\nmpirun -n 1 -ppn 16 gmx_mpi mdrun -notunepme -dlb yes -v -resethway -noconfout -nsteps 4000 -s exp_input.tpr >> \"/home/user/gromacs_example/experiments/gromacs/water_gmx50/pme_single_rank/pme_single_rank.out\"",
+      "spack_setup": "",
+      "execute_experiment": "/home/user/gromacs_example/experiments/gromacs/water_gmx50/pme_single_rank/execute_experiment"
+    },
+    "CONTEXTS": [
+      {
+        "name": "null",
+        "foms": [
+          {
+            "value": "42.556",
+            "units": "s",
+            "name": "Core Time"
+          },
+          {
+            "value": "21.280",
+            "units": "s",
+            "name": "Wall Time"
+          },
+          {
+            "value": "200.0",
+            "units": "%",
+            "name": "Percent Core Time"
+          },
+          {
+            "value": "16.249",
+            "units": "ns/day",
+            "name": "Nanosecs per day"
+          },
+          {
+            "value": "1.477",
+            "units": "hours/ns",
+            "name": "Hours per nanosec"
+          }
+        ]
+      }
+    ]
+  }
+]
+}
diff --git a/lib/ramble/ramble/workspace/workspace.py b/lib/ramble/ramble/workspace/workspace.py
@@ -33,8 +33,6 @@
 import ramble.keywords
 import ramble.software_environments
 from ramble.mirror import MirrorStats
-from ramble.config import ConfigError
-import ramble.experimental.uploader
 
 import spack.util.spack_yaml as syaml
 import spack.util.spack_json as sjson
@@ -1067,27 +1065,6 @@ def write_json_results(self):
             sjson.dump(self.results, f)
         return out_file
 
-    def upload_results(self):
-        if ramble.config.get('config:upload'):
-            # Read upload type and push it there
-            if ramble.config.get('config:upload:type') == 'BigQuery':  # TODO: enum?
-                formatted_data = ramble.experimental.uploader.format_data(self.results)
-
-                # TODO: strategy object?
-                uploader = ramble.experimental.uploader.BigQueryUploader()
-
-                uri = ramble.config.get('config:upload:uri')
-                if not uri:
-                    tty.die('No upload URI (config:upload:uri) in config.')
-
-                tty.msg('Uploading Results to ' + uri)
-                uploader.perform_upload(uri, self.name, formatted_data)
-            else:
-                raise ConfigError("Unknown config:upload:type value")
-
-        else:
-            raise ConfigError("Missing correct conifg:upload parameters")
-
     def default_results(self):
         res = {}