ukri-excalibur · pineapple-cat · Mar 25, 2024 · Mar 11, 2024 · Mar 11, 2024 · Mar 11, 2024
diff --git a/post-processing/README.md b/post-processing/README.md
@@ -6,14 +6,14 @@ The post-processing scripts provided with the ExCALIBUR tests package are intend
 
 There are four main post-processing components:
 
-#### **`Perflog parsing`:**
+#### **`Perflog parsing`**
 - Data from benchmark performance logs are stored in a pandas DataFrame.
-#### **`Data filtering`:**
+#### **`Data filtering`**
 - If more than one perflog is used for plotting, DataFrames from individual perflogs are concatenated together into one DataFrame.
 - The DataFrame is then filtered, keeping only relevant rows and columns.
-#### **`Data transformation`:**
+#### **`Data transformation`**
 - Axis value columns in the DataFrame are scaled according to user specifications.
-#### **`Plotting`:**
+#### **`Plotting`**
 - A filtered and transformed DataFrame is passed to a plotting script, which produces a graph and embeds it in a simple HTML file.
 - Users may run the plotting script to generate a generic bar chart. Graph settings should be specified in a configuration YAML file.
 
@@ -45,9 +45,9 @@ Run `post_processing.py -h` for more information (including debugging flags).
 
 You may also run post-processing with Streamlit to interact with your plots:
 
-```sh
-streamlit run streamlit_post_processing.py log_path config_path [-p plot_type]
-```
+>```streamlit run streamlit_post_processing.py log_path -- [-c config_path]```
+
+The config path is optional when running with Streamlit, as the UI allows you to create a new config on the fly. If you would still like to supply a config path, make sure to include `--` before any post-processing flags to indicate that the arguments belong to the post-processing script rather than Streamlit itself.
 
 ### Configuration Structure
 
@@ -147,10 +147,12 @@ y_axis:
       x_value: "x_val_s"
 
 filters:
-  and: [["filter_col_1", "<=", filter_val_1], ["filter_col_2", "!=", filter_val_2]]
+  and: [["filter_col_1", "<=", filter_val_1],
+        ["filter_col_2", "!=", filter_val_2]]
   or: []
 
-series: [["series_col", "series_val_1"], ["series_col", "series_val_2"]]
+series: [["series_col", "series_val_1"],
+         ["series_col", "series_val_2"]]
 
 column_types:
   x_axis_col: "str"
@@ -296,5 +298,6 @@ All user-specified types are internally converted to their nullable incarnations
 ### Future Development
 
 The post-processing capabilities are still a work in progress. Some upcoming developments:
+
 -  Embed graphs in GitHub Pages, instead of a bare HTML file.
 -  Add scaling and regression plots.
diff --git a/post-processing/config_handler.py b/post-processing/config_handler.py
@@ -1,12 +1,23 @@
+from pathlib import Path
+
 import yaml
 
 
 class ConfigHandler:
 
-    def __init__(self, config: dict):
+    def __init__(self, config: dict, template=False):
+        """
+            Initialise class.
+
+            Args:
+                config: dict, plot configuration information.
+                template: bool, flag to skip config validation (unsafe).
+        """
+
+        if not template:
+            # validate dict structure
+            config = read_config(config)
 
-        # validate dict structure
-        config = read_config(config)
         # extract config information
         self.title = config.get("title")
         self.x_axis = config.get("x_axis")
@@ -19,6 +30,7 @@ def __init__(self, config: dict):
         self.and_filters = []
         self.or_filters = []
         self.series_filters = []
+        self.to_string_filter_vals()
         self.parse_filters()
 
         # parse scaling information
@@ -34,15 +46,53 @@ def __init__(self, config: dict):
         self.parse_columns()
 
     @classmethod
-    def from_path(cfg_hand, config_path):
-        return cfg_hand(open_config(config_path))
+    def from_path(self, config_path: Path, template=False):
+        """
+            Initialise class from a path.
+        """
+        return self(open_config(config_path), template)
+
+    @classmethod
+    def from_template(self):
+        """
+            Initialise class from an empty template. Skips config validation.
+        """
+
+        return self(dict({
+            "title": None,
+            "x_axis": {"value": None, "units": {"custom": None}},
+            "y_axis": {"value": None, "units": {"custom": None}},
+            "filters": {"and": [], "or": []},
+            "series": [],
+            "column_types": {}}), template=True)
 
     def get_filters(self):
+        """
+            Return and, or, and series filter lists.
+        """
         return self.and_filters, self.or_filters, self.series_filters
 
     def get_y_scaling(self):
+        """
+            Return column and custom scaling information.
+        """
         return self.scaling_column, self.scaling_custom
 
+    def to_string_filter_vals(self):
+        """
+            Store filter values as their string representations for internal consistency.
+        """
+
+        # filters
+        if self.filters:
+            self.filters["and"] = ([[f[0], f[1], str(f[2])] for f in self.filters["and"]]
+                                   if self.filters.get("and") else [])
+            self.filters["or"] = ([[f[0], f[1], str(f[2])] for f in self.filters["or"]]
+                                  if self.filters.get("or") else [])
+
+        # series
+        self.series = [[s[0], str(s[1])] for s in self.series] if self.series else []
+
     def parse_filters(self):
         """
             Store filtering information from filters and series.
@@ -74,8 +124,10 @@ def parse_columns(self):
         """
 
         # axis columns
-        self.plot_columns = [self.x_axis.get("value"), self.x_axis["units"].get("column"),
-                             self.y_axis.get("value"), self.y_axis["units"].get("column")]
+        self.plot_columns = [self.x_axis.get("value"),
+                             self.x_axis["units"].get("column") if self.x_axis.get("units") else None,
+                             self.y_axis.get("value"),
+                             self.y_axis["units"].get("column") if self.y_axis.get("units") else None]
 
         # FIXME (issue #255): allow all series values to be selected with *
         # (or if only column name is supplied)
@@ -131,15 +183,14 @@ def to_yaml(self):
         return yaml.dump(self.to_dict(), default_flow_style=None, sort_keys=False)
 
 
-def open_config(path):
+def open_config(path: Path):
     """
         Return a dictionary containing configuration information for plotting
         from the path to a yaml file.
 
         Args:
-            path: path, path to yaml config file.
+            path: Path, path to yaml config file.
     """
-
     with open(path, "r") as file:
         return load_config(file)
 
@@ -154,13 +205,13 @@ def load_config(file):
     return yaml.safe_load(file)
 
 
-def read_config(config):
+def read_config(config: dict):
     """
         Check required configuration information. At least plot title, x-axis,
         y-axis, and column types must be present.
 
         Args:
-            config: dict, config information.
+            config: dict, plot configuration information.
     """
 
     # check plot title information

diff --git a/post-processing/perflog_handler.py b/post-processing/perflog_handler.py
@@ -3,13 +3,21 @@
 import os
 import re
 from itertools import chain
+from pathlib import Path
 
 import pandas as pd
 
 
 class PerflogHandler:
 
-    def __init__(self, log_path, debug=False):
+    def __init__(self, log_path: Path, debug=False):
+        """
+            Initialise class.
+
+            Args:
+                log_path: Path, path to performance log file or directory.
+                debug: bool, flag to print additional information to console.
+        """
 
         self.log_path = log_path
         self.debug = debug
@@ -18,6 +26,9 @@ def __init__(self, log_path, debug=False):
         self.read_all_perflogs()
 
     def get_df(self):
+        """
+            Return dataframe containing performance log information.
+        """
         return self.df
 
     def get_log_files(self):
@@ -80,7 +91,7 @@ def read_all_perflogs(self):
                 errno.ENOENT, "Could not find a valid perflog in path", self.log_path)
 
 
-def read_perflog(path):
+def read_perflog(path: Path):
     """
         Return a pandas dataframe from a reframe performance log. The dataframe will
         have columns for all fields in a performance log record except display name,
@@ -92,7 +103,7 @@ def read_perflog(path):
             in reframe's configuration. See code.
 
         Args:
-            path: path, path to log file.
+            path: Path, path to log file.
     """
 
     # read perflog into dataframe
@@ -127,7 +138,7 @@ def read_perflog(path):
     return df
 
 
-def get_display_name_info(display_name):
+def get_display_name_info(display_name: str):
     """
         Return a tuple containing the test name and a dictionary of parameter names
         and their values from the given input string. The parameter dictionary may be empty
@@ -145,15 +156,15 @@ def get_display_name_info(display_name):
     return test_name, dict(params)
 
 
-def insert_key_cols(df: pd.DataFrame, index, results):
+def insert_key_cols(df: pd.DataFrame, index: int, results: 'list[dict]'):
     """
         Modify a dataframe to include new columns (extracted from results) inserted at
         a given index.
 
         Args:
-            df: dataframe, to be modified by this function.
+            df: pd.DataFrame, to be modified by this function.
             index: int, index as which to insert new columns into the dataframe.
-            results: dict list, contains key-value mapping information for all rows.
+            results: list[dict], contains key-value mapping information for all rows.
     """
 
     # get set of keys from all rows