From 706816f58b32a247e6b89af3c4ebcc49fde35f22 Mon Sep 17 00:00:00 2001
From: pineapple-cat <em.dubrovska@gmail.com>
Date: Mon, 23 Oct 2023 17:07:33 +0100
Subject: [PATCH 01/35] Separated row filters into OR and AND categories.

---
 post-processing/post_processing.py          | 18 ++++++++++--------
 post-processing/post_processing_config.yaml |  4 +++-
 2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/post-processing/post_processing.py b/post-processing/post_processing.py
index 10a5613e..0338dce7 100644
--- a/post-processing/post_processing.py
+++ b/post-processing/post_processing.py
@@ -82,16 +82,17 @@ def run_post_processing(self, log_path, config):
         # check acceptable number of series
         if len(set(series_columns)) > 1:
             raise RuntimeError("Currently supporting grouping of series by only one column. Please use a single column name in your series configuration.")
-        # add series columns to column list
+        # add series columns to dataframe column list
         for c in series_columns:
             if c not in columns:
                 columns.append(c)
 
-        filters = config["filters"]
+        and_filters = config["filters"]["and"]
+        or_filters = config["filters"]["or"]
         # extract filter columns
-        filter_columns = [f[0] for f in filters]
+        filter_columns = [f[0] for f in and_filters] + [f[0] for f in or_filters]
         # gather all relevant columns
-        all_columns = columns + filter_columns
+        all_columns = set(columns + filter_columns)
 
         invalid_columns = []
         # check for invalid columns
@@ -138,12 +139,13 @@ def run_post_processing(self, log_path, config):
 
         mask = pd.Series(df.index.notnull())
         # filter rows
-        if filters:
-            mask = reduce(op.and_, (self.row_filter(f, df) for f in filters))
+        if and_filters:
+            mask = reduce(op.and_, (self.row_filter(f, df) for f in and_filters))
+        if or_filters:
+            mask = mask & reduce(op.or_, (self.row_filter(f, df) for f in or_filters))
         # apply series filters
         if series_filters:
-            series_mask = reduce(op.or_, (self.row_filter(f, df) for f in series_filters))
-            mask = mask & series_mask
+            mask = mask & reduce(op.or_, (self.row_filter(f, df) for f in series_filters))
         # ensure not all rows are filtered away
         if df[mask].empty:
             raise pd.errors.EmptyDataError("Filtered dataframe is empty", df[mask].index)
diff --git a/post-processing/post_processing_config.yaml b/post-processing/post_processing_config.yaml
index 8ca87e69..f24e08a1 100644
--- a/post-processing/post_processing_config.yaml
+++ b/post-processing/post_processing_config.yaml
@@ -19,7 +19,9 @@ y_axis:
 
 # Optional row filters (specify an empty list [] if none are required).
 # Filter format: [column_name, operator, value]
-filters: [["system", "==", "default"]]
+filters:
+  and: [["system", "==", "default"]]
+  or: []
 
 # Optional setting to display several plots in the same graph.
 # Number of series, if used, must be >=2 (specify an empty list [] if there is only one series).

From 9a777d312ef98f8a4d4be5f294ebc37f78c5a89f Mon Sep 17 00:00:00 2001
From: pineapple-cat <em.dubrovska@gmail.com>
Date: Mon, 23 Oct 2023 17:11:41 +0100
Subject: [PATCH 02/35] Updated existing tests to account for filtering change.

---
 post-processing/test_post_processing.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/post-processing/test_post_processing.py b/post-processing/test_post_processing.py
index a4104530..1b660674 100644
--- a/post-processing/test_post_processing.py
+++ b/post-processing/test_post_processing.py
@@ -182,7 +182,7 @@ def test_high_level_script(run_sombrero):
 
     # check expected failure from invalid (filter) column
     try:
-        post_.run_post_processing(sombrero_log_path, {"filters": [["fake_column", "==", 2]], "series": [], "x_axis": {"value": "tasks", "units": {"custom": None}}, "y_axis": {"value": "flops_value", "units": {"column": "flops_unit"}}})
+        post_.run_post_processing(sombrero_log_path, {"filters": {"and": [["fake_column", "==", 2]], "or": []}, "series": [], "x_axis": {"value": "tasks", "units": {"custom": None}}, "y_axis": {"value": "flops_value", "units": {"column": "flops_unit"}}})
     except KeyError as e:
         assert e.args[1] == ["fake_column"]
     else:
@@ -190,7 +190,7 @@ def test_high_level_script(run_sombrero):
 
     # check expected failure from invalid filter operator
     try:
-        post_.run_post_processing(sombrero_log_path, {"filters": [["tasks", "!!", 2]], "series": [], "x_axis": {"value": "tasks", "units": {"custom": None}}, "y_axis": {"value": "flops_value", "units": {"column": "flops_unit"}}, "column_types": {"tasks": "int", "flops_value": "float", "flops_unit": "str"}})
+        post_.run_post_processing(sombrero_log_path, {"filters": {"and": [["tasks", "!!", 2]], "or": []}, "series": [], "x_axis": {"value": "tasks", "units": {"custom": None}}, "y_axis": {"value": "flops_value", "units": {"column": "flops_unit"}}, "column_types": {"tasks": "int", "flops_value": "float", "flops_unit": "str"}})
     except KeyError as e:
         assert e.args[1] == "!!"
     else:
@@ -198,7 +198,7 @@ def test_high_level_script(run_sombrero):
 
     # check expected failure from invalid filter value type
     try:
-        post_.run_post_processing(sombrero_log_path, {"filters": [["flops_value", ">", "v"]], "series": [], "x_axis": {"value": "tasks", "units": {"custom": None}}, "y_axis": {"value": "flops_value", "units": {"column": "flops_unit"}}, "column_types": {"tasks": "int", "flops_value": "float", "flops_unit": "str"}})
+        post_.run_post_processing(sombrero_log_path, {"filters": {"and": [["flops_value", ">", "v"]], "or": []}, "series": [], "x_axis": {"value": "tasks", "units": {"custom": None}}, "y_axis": {"value": "flops_value", "units": {"column": "flops_unit"}}, "column_types": {"tasks": "int", "flops_value": "float", "flops_unit": "str"}})
     except ValueError:
         assert True
     else:
@@ -206,7 +206,7 @@ def test_high_level_script(run_sombrero):
 
     # check expected failure from filtering out every row
     try:
-        post_.run_post_processing(sombrero_log_path, {"filters": [["tasks", ">", 2]], "series": [], "x_axis": {"value": "tasks", "units": {"custom": None}}, "y_axis": {"value": "flops_value", "units": {"column": "flops_unit"}}, "column_types": {"tasks": "int", "flops_value": "float", "flops_unit": "str"}})
+        post_.run_post_processing(sombrero_log_path, {"filters": {"and": [["tasks", ">", 2]], "or": []}, "series": [], "x_axis": {"value": "tasks", "units": {"custom": None}}, "y_axis": {"value": "flops_value", "units": {"column": "flops_unit"}}, "column_types": {"tasks": "int", "flops_value": "float", "flops_unit": "str"}})
     except pd.errors.EmptyDataError:
         assert True
     else:
@@ -214,7 +214,7 @@ def test_high_level_script(run_sombrero):
 
     # check expected failure from row number vs unique x-axis value number mismatch
     try:
-        df = post_.run_post_processing(sombrero_log_path, {"filters": [], "series": [], "x_axis": {"value": "tasks", "units": {"custom": None}}, "y_axis": {"value": "flops_value", "units": {"column": "flops_unit"}}, "column_types": {"tasks": "int", "flops_value": "float", "flops_unit": "str"}})
+        df = post_.run_post_processing(sombrero_log_path, {"filters": {"and": [], "or": []}, "series": [], "x_axis": {"value": "tasks", "units": {"custom": None}}, "y_axis": {"value": "flops_value", "units": {"column": "flops_unit"}}, "column_types": {"tasks": "int", "flops_value": "float", "flops_unit": "str"}})
     except RuntimeError:
         assert True
     else:
@@ -222,7 +222,7 @@ def test_high_level_script(run_sombrero):
 
     # check correct display name parsing
     try:
-        df = post_.run_post_processing(sombrero_changed_log_path, {"filters": [], "series": [], "x_axis": {"value": "tasks", "units": {"custom": None}}, "y_axis": {"value": "cpus_per_task", "units": {"column": "extra_param"}}, "column_types": {"tasks": "int", "cpus_per_task": "int", "extra_param": "int"}})
+        df = post_.run_post_processing(sombrero_changed_log_path, {"filters": {"and": [], "or": []}, "series": [], "x_axis": {"value": "tasks", "units": {"custom": None}}, "y_axis": {"value": "cpus_per_task", "units": {"column": "extra_param"}}, "column_types": {"tasks": "int", "cpus_per_task": "int", "extra_param": "int"}})
     except RuntimeError as e:
         # three param columns found in changed log
         EXPECTED_FIELDS = ["tasks", "cpus_per_task", "extra_param"]
@@ -232,14 +232,14 @@ def test_high_level_script(run_sombrero):
         assert False
 
     # check correct date filtering
-    df = post_.run_post_processing(sombrero_changed_log_path, {"title": "Title", "filters": [["job_completion_time", ">", "2000-06-01T12:30:15"]], "series": [], "x_axis": {"value": "job_completion_time", "units": {"custom": None}}, "y_axis": {"value": "flops_value", "units": {"column": "flops_unit"}}, "column_types": {"job_completion_time": "datetime", "flops_value": "float", "flops_unit": "str"}})
+    df = post_.run_post_processing(sombrero_changed_log_path, {"title": "Title", "filters": {"and": [["job_completion_time", ">", "2000-06-01T12:30:15"]], "or": []}, "series": [], "x_axis": {"value": "job_completion_time", "units": {"custom": None}}, "y_axis": {"value": "flops_value", "units": {"column": "flops_unit"}}, "column_types": {"job_completion_time": "datetime", "flops_value": "float", "flops_unit": "str"}})
     # check returned subset is as expected
     assert len(df) == 2
 
     # check correct concatenation of two dataframes with different columns
     try:
         # get collated dataframe subset
-        df = post_.run_post_processing(Path(sombrero_log_path).parent, {"filters": [], "series": [], "x_axis": {"value": "tasks", "units": {"custom": None}}, "y_axis": {"value": "flops_value", "units": {"column": "flops_unit"}}, "column_types": {"tasks": "int", "flops_value": "float", "flops_unit": "str"}})
+        df = post_.run_post_processing(Path(sombrero_log_path).parent, {"filters": {"and": [], "or": []}, "series": [], "x_axis": {"value": "tasks", "units": {"custom": None}}, "y_axis": {"value": "flops_value", "units": {"column": "flops_unit"}}, "column_types": {"tasks": "int", "flops_value": "float", "flops_unit": "str"}})
     except RuntimeError as e:
         # dataframe has records from both files
         assert len(e.args[1]) == 8
@@ -247,7 +247,7 @@ def test_high_level_script(run_sombrero):
         assert False
 
     # get filtered dataframe subset
-    df = post_.run_post_processing(sombrero_log_path, {"title": "Title", "filters": [["tasks", ">", 1], ["cpus_per_task", "==", 2]], "series": [], "x_axis": {"value": "tasks", "units": {"custom": None}}, "y_axis": {"value": "flops_value", "units": {"column": "flops_unit"}}, "column_types": {"tasks": "int", "flops_value": "float", "flops_unit": "str", "cpus_per_task": "int"}})
+    df = post_.run_post_processing(sombrero_log_path, {"title": "Title", "filters": {"and": [["tasks", ">", 1], ["cpus_per_task", "==", 2]], "or": []}, "series": [], "x_axis": {"value": "tasks", "units": {"custom": None}}, "y_axis": {"value": "flops_value", "units": {"column": "flops_unit"}}, "column_types": {"tasks": "int", "flops_value": "float", "flops_unit": "str", "cpus_per_task": "int"}})
 
     EXPECTED_FIELDS = ["tasks", "flops_value", "flops_unit"]
     # check returned subset is as expected

From a60bad251a2b78ad77e9b344fd31c3cc96708eb0 Mon Sep 17 00:00:00 2001
From: pineapple-cat <em.dubrovska@gmail.com>
Date: Tue, 24 Oct 2023 15:20:06 +0100
Subject: [PATCH 03/35] Updated filter documentation.

---
 post-processing/README.md | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/post-processing/README.md b/post-processing/README.md
index 1100cf6c..5b87dfb6 100644
--- a/post-processing/README.md
+++ b/post-processing/README.md
@@ -41,6 +41,8 @@ Before running post-processing, create a config file including all necessary inf
     - `value` - Axis data points. Specified with a column name.
     - `units` - Axis units. Specified either with a column name or a custom label (may be null).
 - `filters` - (Optional.) Filter data rows based on specified conditions. (Specify an empty list if no filters are required.)
+  - `and` - Filter mask is determined from a logical AND of conditions in list.
+  - `or` - Filter mask is determined from a logical OR of conditions in list.
   - `Format: [column_name, operator, value]`
   - `Accepted operators: "==", "!=", "<", ">", "<=", ">="`
 - `series` - (Optional.) Display several plots in the same graph and group x-axis data by specified column values. (Specify an empty list if there is only one series.)
@@ -63,7 +65,9 @@ y_axis:
   units:
     column: "unit_col"
 
-filters: [["filter_col_1", "<=", filter_val_1], ["filter_col_2", "!=", filter_val_2]]
+filters:
+  and: [["filter_col_1", "<=", filter_val_1], ["filter_col_2", "!=", filter_val_2]]
+  or: []
 
 series: [["series_col", "series_val_1"], ["series_col", "series_val_2"]]
 
@@ -85,6 +89,16 @@ The settings above will produce a graph that will have its x-axis data grouped b
 - (`x_val_2`, `series_val_1`)
 - (`x_val_2`, `series_val_2`)
 
+#### A Note on Filters
+
+AND filters, OR filters, and series (treated as special OR filters) are all combined with a logical AND to produce the final filter mask applied to the DataFrame prior to graphing. For example:
+
+- `and_filters` = `cond1`, `cond2`
+- `or_filters`= `cond3`, `cond4`
+- `series` = `ser1`, `ser2`
+
+The filters above would produce the final filter `mask` = (`cond1` AND `cond2`) AND (`cond3` OR `cond4`) AND (`ser1` OR `ser2`).
+
 #### A Note on Column Types
 
 All user-specified types are internally converted to their nullable incarnations. As such:

From 44c0b8251a6f31f377e161034655a8a1d8983828 Mon Sep 17 00:00:00 2001
From: pineapple-cat <em.dubrovska@gmail.com>
Date: Tue, 24 Oct 2023 16:52:16 +0100
Subject: [PATCH 04/35] Slight filter mask code adjustment.

---
 post-processing/post_processing.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/post-processing/post_processing.py b/post-processing/post_processing.py
index 0338dce7..b38cdc9c 100644
--- a/post-processing/post_processing.py
+++ b/post-processing/post_processing.py
@@ -142,10 +142,10 @@ def run_post_processing(self, log_path, config):
         if and_filters:
             mask = reduce(op.and_, (self.row_filter(f, df) for f in and_filters))
         if or_filters:
-            mask = mask & reduce(op.or_, (self.row_filter(f, df) for f in or_filters))
+            mask &= reduce(op.or_, (self.row_filter(f, df) for f in or_filters))
         # apply series filters
         if series_filters:
-            mask = mask & reduce(op.or_, (self.row_filter(f, df) for f in series_filters))
+            mask &= reduce(op.or_, (self.row_filter(f, df) for f in series_filters))
         # ensure not all rows are filtered away
         if df[mask].empty:
             raise pd.errors.EmptyDataError("Filtered dataframe is empty", df[mask].index)

From 8fe70c2c9527cc999f10faf6901ea43b85818e41 Mon Sep 17 00:00:00 2001
From: pineapple-cat <em.dubrovska@gmail.com>
Date: Wed, 25 Oct 2023 15:45:01 +0100
Subject: [PATCH 05/35] Added OR filter functionality unit test.

---
 post-processing/test_post_processing.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/post-processing/test_post_processing.py b/post-processing/test_post_processing.py
index 1b660674..f1c60194 100644
--- a/post-processing/test_post_processing.py
+++ b/post-processing/test_post_processing.py
@@ -236,6 +236,11 @@ def test_high_level_script(run_sombrero):
     # check returned subset is as expected
     assert len(df) == 2
 
+    # check correct or filtering
+    df = post_.run_post_processing(sombrero_log_path, {"title": "Title", "filters": {"and": [], "or": [["tasks", ">", "1"], ["tasks", "<", "2"]]}, "series": [["cpus_per_task", "1"], ["cpus_per_task", "2"]], "x_axis": {"value": "cpus_per_task", "units": {"custom": None}}, "y_axis": {"value": "flops_value", "units": {"column": "flops_unit"}}, "column_types": {"tasks": "int", "cpus_per_task": "int", "flops_value": "float", "flops_unit": "str"}})
+    # check returned subset is as expected
+    assert len(df) == 4
+
     # check correct concatenation of two dataframes with different columns
     try:
         # get collated dataframe subset

From c7c92e42c46aaec74b083e2017b85d6fb7bb3955 Mon Sep 17 00:00:00 2001
From: pineapple-cat <em.dubrovska@gmail.com>
Date: Thu, 2 Nov 2023 17:34:34 +0000
Subject: [PATCH 06/35] Removed series implementation information from filter
 documentation.

---
 post-processing/README.md | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/post-processing/README.md b/post-processing/README.md
index 5b87dfb6..83b0f66c 100644
--- a/post-processing/README.md
+++ b/post-processing/README.md
@@ -91,13 +91,12 @@ The settings above will produce a graph that will have its x-axis data grouped b
 
 #### A Note on Filters
 
-AND filters, OR filters, and series (treated as special OR filters) are all combined with a logical AND to produce the final filter mask applied to the DataFrame prior to graphing. For example:
+AND filters and OR filters are combined with a logical AND to produce the final filter mask applied to the DataFrame prior to graphing. For example:
 
 - `and_filters` = `cond1`, `cond2`
 - `or_filters`= `cond3`, `cond4`
-- `series` = `ser1`, `ser2`
 
-The filters above would produce the final filter `mask` = (`cond1` AND `cond2`) AND (`cond3` OR `cond4`) AND (`ser1` OR `ser2`).
+The filters above would produce the final filter `mask` = (`cond1` AND `cond2`) AND (`cond3` OR `cond4`).
 
 #### A Note on Column Types
 

From 380d2bca3cd0d354855342eb4a5fce49dbddce53 Mon Sep 17 00:00:00 2001
From: pineapple-cat <em.dubrovska@gmail.com>
Date: Mon, 6 Nov 2023 16:53:25 +0000
Subject: [PATCH 07/35] Added ability to scale axis values by a column.

---
 post-processing/post_processing.py | 65 ++++++++++++++++++++++++++----
 1 file changed, 58 insertions(+), 7 deletions(-)

diff --git a/post-processing/post_processing.py b/post-processing/post_processing.py
index b38cdc9c..c34dcdef 100644
--- a/post-processing/post_processing.py
+++ b/post-processing/post_processing.py
@@ -10,9 +10,10 @@
 from itertools import chain
 from pathlib import Path
 
+import numpy as np
 import pandas as pd
 import yaml
-from bokeh.models import Legend, HoverTool
+from bokeh.models import HoverTool, Legend
 from bokeh.models.sources import ColumnDataSource
 from bokeh.palettes import viridis
 from bokeh.plotting import figure, output_file, save
@@ -91,8 +92,16 @@ def run_post_processing(self, log_path, config):
         or_filters = config["filters"]["or"]
         # extract filter columns
         filter_columns = [f[0] for f in and_filters] + [f[0] for f in or_filters]
+
+        # FIXME: add scaling for x-axis
+        scaling_columns = []
+        # extract scaling columns
+        if config["y_axis"].get("scaling"):
+            if config["y_axis"]["scaling"].get("column"):
+                scaling_columns.append(config["y_axis"]["scaling"]["column"]["name"])
+
         # gather all relevant columns
-        all_columns = set(columns + filter_columns)
+        all_columns = set(columns + filter_columns + scaling_columns)
 
         invalid_columns = []
         # check for invalid columns
@@ -161,6 +170,15 @@ def run_post_processing(self, log_path, config):
         if num_filtered_rows > num_x_data_points:
             raise RuntimeError("Unexpected number of rows ({0}) does not match number of unique x-axis values per series ({1})".format(num_filtered_rows, num_x_data_points), df[columns][mask])
 
+        # apply data transformation per series
+        if series_filters:
+            for f in series_filters:
+                m = self.row_filter(f, df)
+                df[mask & m] = self.transform_axis(df[mask & m], config["y_axis"])
+        # apply data transformation to all data
+        else:
+            df[mask] = self.transform_axis(df[mask], config["y_axis"])
+
         print("Selected dataframe:")
         print(df[columns][mask])
 
@@ -211,9 +229,9 @@ def plot_generic(self, title, df: pd.DataFrame, x_axis, y_axis, series_filters):
 
         # adjust y-axis range
         min_y = 0 if min(df[y_column]) >= 0 \
-                else math.floor(min(df[y_column])*1.2)
+                else math.floor(np.nanmin(df[y_column])*1.2)
         max_y = 0 if max(df[y_column]) <= 0 \
-                else math.ceil(max(df[y_column])*1.2)
+                else math.ceil(np.nanmax(df[y_column])*1.2)
 
         # create html file to store plot in
         output_file(filename=os.path.join(Path(__file__).parent, "{0}.html".format(title.replace(" ", "_"))), title=title)
@@ -294,6 +312,31 @@ def row_filter(self, filter, df: pd.DataFrame):
 
         return mask
 
+    def transform_axis(self, df: pd.DataFrame, axis):
+        """
+            Divide axis values by specified values and reflect this change in the dataframe.
+
+            Args:
+                df: dataframe, data to plot.
+                axis: dict, axis column, units, and values to scale by.
+        """
+
+        # FIXME: try to make this an in-place process
+        if axis.get("scaling"):
+            # scale by column
+            if axis["scaling"].get("column"):
+                # check types
+                if not pd.api.types.is_numeric_dtype(df[axis["value"]].dtype) or \
+                   not pd.api.types.is_numeric_dtype(df[axis["scaling"]["column"]["name"]].dtype):
+                    # both columns must be numeric
+                    raise TypeError("Cannot scale column '{0}' of type {1} by column '{2}' of type {3}."
+                                    .format(axis["value"], df[axis["value"]].dtype,
+                                            axis["scaling"]["column"]["name"],
+                                            df[axis["scaling"]["column"]["name"]].dtype))
+                df[axis["value"]] /= df[axis["scaling"]["column"]["name"]]
+
+        return df
+
 def read_args():
     """
         Return parsed command line arguments.
@@ -437,7 +480,7 @@ def get_axis_info(df: pd.DataFrame, axis):
 
         Args:
             df: dataframe, data to plot.
-            axis: dict, axis column and units.
+            axis: dict, axis column, units, and values to scale by.
     """
 
     # get column name of axis
@@ -450,9 +493,17 @@ def get_axis_info(df: pd.DataFrame, axis):
         if len(unit_set) != 1:
             raise RuntimeError("Unexpected number of axis unit entries {0}".format(unit_set))
         units = next(iter(unit_set))
+
+    # get values to scale by
+    scaling = None
+    if axis.get("scaling"):
+        if axis.get("scaling").get("column"):
+            scaling = axis.get("scaling").get("column").get("name")
+
     # determine axis label
-    label = "{0}{1}".format(col_name.replace("_", " ").title(),
-                            " ({0})".format(units) if units else "")
+    label = "{0}{1}{2}".format(col_name.replace("_", " ").title(),
+                               " Scaled by {0}".format(scaling.replace("_", " ").title()) if scaling else "",
+                               " ({0})".format(units) if units else "")
 
     return col_name, label
 

From 35c211e142b8418050bbadd01f872476c669b21d Mon Sep 17 00:00:00 2001
From: pineapple-cat <em.dubrovska@gmail.com>
Date: Mon, 6 Nov 2023 16:54:17 +0000
Subject: [PATCH 08/35] Added column scaling unit tests.

---
 post-processing/test_post_processing.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/post-processing/test_post_processing.py b/post-processing/test_post_processing.py
index f1c60194..8d5efb93 100644
--- a/post-processing/test_post_processing.py
+++ b/post-processing/test_post_processing.py
@@ -241,6 +241,17 @@ def test_high_level_script(run_sombrero):
     # check returned subset is as expected
     assert len(df) == 4
 
+    # check correct scaling
+    dfs = post_.run_post_processing(sombrero_log_path, {"title": "Title", "filters": {"and": [["cpus_per_task", "==", 2]], "or": []}, "series": [], "x_axis": {"value": "tasks", "units": {"custom": None}}, "y_axis": {"value": "flops_value", "units": {"column": "flops_unit"}, "scaling": {"column": {"name": "OMP_NUM_THREADS"}}}, "column_types": {"tasks": "int", "flops_value": "float", "flops_unit": "str", "cpus_per_task": "int", "OMP_NUM_THREADS": "int"}})
+    # check flops values are halved compared to previous df
+    assert (dfs["flops_value"] == df[df["cpus_per_task"] == 2]["flops_value"]/2).all()
+
+    # check expected failure from scaling by incorrect type
+    try:
+        df = post_.run_post_processing(sombrero_log_path, {"title": "Title", "filters": {"and": [], "or": []}, "series": [["cpus_per_task", 1], ["cpus_per_task", 2]], "x_axis": {"value": "tasks", "units": {"custom": None}}, "y_axis": {"value": "flops_value", "units": {"column": "flops_unit"}, "scaling": {"column": {"name": "OMP_NUM_THREADS"}}}, "column_types": {"tasks": "int", "flops_value": "float", "flops_unit": "str", "cpus_per_task": "int", "OMP_NUM_THREADS": "str"}})
+    except TypeError:
+        assert True
+
     # check correct concatenation of two dataframes with different columns
     try:
         # get collated dataframe subset

From a78b81352a9f02baa90301eb81cfe2ab0d24e26f Mon Sep 17 00:00:00 2001
From: pineapple-cat <em.dubrovska@gmail.com>
Date: Mon, 6 Nov 2023 17:30:11 +0000
Subject: [PATCH 09/35] Added preliminary functionality to scale by specific
 value in a given column.

---
 post-processing/post_processing.py | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/post-processing/post_processing.py b/post-processing/post_processing.py
index c34dcdef..47265839 100644
--- a/post-processing/post_processing.py
+++ b/post-processing/post_processing.py
@@ -174,10 +174,10 @@ def run_post_processing(self, log_path, config):
         if series_filters:
             for f in series_filters:
                 m = self.row_filter(f, df)
-                df[mask & m] = self.transform_axis(df[mask & m], config["y_axis"])
+                df[mask & m] = self.transform_axis(df[mask & m], config["y_axis"], config["x_axis"]["value"])
         # apply data transformation to all data
         else:
-            df[mask] = self.transform_axis(df[mask], config["y_axis"])
+            df[mask] = self.transform_axis(df[mask], config["y_axis"], config["x_axis"]["value"])
 
         print("Selected dataframe:")
         print(df[columns][mask])
@@ -259,6 +259,9 @@ def plot_generic(self, title, df: pd.DataFrame, x_axis, y_axis, series_filters):
         # add labels
         plot.xaxis.axis_label = x_label
         plot.yaxis.axis_label = y_label
+        # remove x-axis group ticks
+        plot.xaxis.major_tick_line_color = None
+        plot.xaxis.major_label_text_font_size = "0pt"
         # adjust font size
         plot.title.text_font_size = "15pt"
 
@@ -312,19 +315,21 @@ def row_filter(self, filter, df: pd.DataFrame):
 
         return mask
 
-    def transform_axis(self, df: pd.DataFrame, axis):
+    def transform_axis(self, df: pd.DataFrame, axis, x_column):
         """
             Divide axis values by specified values and reflect this change in the dataframe.
 
             Args:
                 df: dataframe, data to plot.
                 axis: dict, axis column, units, and values to scale by.
+                x_column: string, name of column containing x-axis values.
         """
 
         # FIXME: try to make this an in-place process
         if axis.get("scaling"):
             # scale by column
             if axis["scaling"].get("column"):
+
                 # check types
                 if not pd.api.types.is_numeric_dtype(df[axis["value"]].dtype) or \
                    not pd.api.types.is_numeric_dtype(df[axis["scaling"]["column"]["name"]].dtype):
@@ -333,7 +338,14 @@ def transform_axis(self, df: pd.DataFrame, axis):
                                     .format(axis["value"], df[axis["value"]].dtype,
                                             axis["scaling"]["column"]["name"],
                                             df[axis["scaling"]["column"]["name"]].dtype))
-                df[axis["value"]] /= df[axis["scaling"]["column"]["name"]]
+
+                # scale by specific value in column
+                if axis["scaling"]["column"].get("x_value"):
+                    x_value = axis["scaling"]["column"]["x_value"]
+                    df[axis["value"]] /= df[df[x_column] == x_value][axis["scaling"]["column"]["name"]].iloc[0]
+                # scale by entire column
+                else:
+                    df[axis["value"]] /= df[axis["scaling"]["column"]["name"]]
 
         return df
 
@@ -498,7 +510,10 @@ def get_axis_info(df: pd.DataFrame, axis):
     scaling = None
     if axis.get("scaling"):
         if axis.get("scaling").get("column"):
-            scaling = axis.get("scaling").get("column").get("name")
+            if axis.get("scaling").get("column").get("x_value"):
+                scaling = "{0} {1}".format(axis.get("scaling").get("column").get("x_value"), axis.get("scaling").get("column").get("name"))
+            else:
+                scaling = axis.get("scaling").get("column").get("name")
 
     # determine axis label
     label = "{0}{1}{2}".format(col_name.replace("_", " ").title(),

From ca2deebab7322a7dd6e731d991fc2c11af47baf6 Mon Sep 17 00:00:00 2001
From: pineapple-cat <em.dubrovska@gmail.com>
Date: Wed, 8 Nov 2023 17:19:14 +0000
Subject: [PATCH 10/35] Added ability to scale axis values by one custom value.

---
 post-processing/post_processing.py | 42 ++++++++++++++++++++----------
 1 file changed, 28 insertions(+), 14 deletions(-)

diff --git a/post-processing/post_processing.py b/post-processing/post_processing.py
index 47265839..c035f6e9 100644
--- a/post-processing/post_processing.py
+++ b/post-processing/post_processing.py
@@ -305,7 +305,7 @@ def row_filter(self, filter, df: pd.DataFrame):
                 value = pd.Series(value, dtype=df[column].dtype).iloc[0]
                 mask = operator(df[column], value)
             except TypeError or ValueError as e:
-                e.args = (e.args[0] + " for column: \'{0}\' and value: \'{1}\'".format(column, value),)
+                e.args = (e.args[0] + " for column '{0}' and value '{1}'".format(column, value),)
                 raise
 
         if self.debug & self.verbose:
@@ -327,26 +327,39 @@ def transform_axis(self, df: pd.DataFrame, axis, x_column):
 
         # FIXME: try to make this an in-place process
         if axis.get("scaling"):
+
             # scale by column
             if axis["scaling"].get("column"):
 
+                scaling_column = axis["scaling"]["column"]["name"]
+                x_value = axis["scaling"]["column"].get("x_value")
+
                 # check types
                 if not pd.api.types.is_numeric_dtype(df[axis["value"]].dtype) or \
-                   not pd.api.types.is_numeric_dtype(df[axis["scaling"]["column"]["name"]].dtype):
+                   not pd.api.types.is_numeric_dtype(df[scaling_column].dtype):
                     # both columns must be numeric
                     raise TypeError("Cannot scale column '{0}' of type {1} by column '{2}' of type {3}."
                                     .format(axis["value"], df[axis["value"]].dtype,
-                                            axis["scaling"]["column"]["name"],
-                                            df[axis["scaling"]["column"]["name"]].dtype))
+                                            scaling_column, df[scaling_column].dtype))
 
                 # scale by specific value in column
-                if axis["scaling"]["column"].get("x_value"):
-                    x_value = axis["scaling"]["column"]["x_value"]
-                    df[axis["value"]] /= df[df[x_column] == x_value][axis["scaling"]["column"]["name"]].iloc[0]
+                if x_value:
+                    df[axis["value"]] /= df[df[x_column] == x_value][scaling_column].iloc[0]
                 # scale by entire column
                 else:
                     df[axis["value"]] /= df[axis["scaling"]["column"]["name"]]
 
+            # scale by custom value
+            elif axis["scaling"].get("custom"):
+                scaling_value = axis["scaling"]["custom"]
+                try:
+                    # interpret scaling value as column dtype
+                    scaling_value = pd.Series(scaling_value, dtype=df[axis["value"]].dtype).iloc[0]
+                except ValueError as e:
+                    e.args = (e.args[0] + " as a scaling value for column '{0}'".format(axis["value"]),)
+                    raise
+                df[axis["value"]] /= scaling_value
+
         return df
 
 def read_args():
@@ -361,7 +374,7 @@ def read_args():
     parser.add_argument("config_path", type=str, help="path to a configuration file specifying what to plot")
 
     # optional argument (plot type)
-    parser.add_argument("-p", "--plot_type", type=str, default="generic", help="type of plot to be generated (default: \'generic\')")
+    parser.add_argument("-p", "--plot_type", type=str, default="generic", help="type of plot to be generated (default: 'generic')")
 
     # info dump flags
     parser.add_argument("-d", "--debug", action="store_true", help="debug flag for printing additional information")
@@ -506,14 +519,15 @@ def get_axis_info(df: pd.DataFrame, axis):
             raise RuntimeError("Unexpected number of axis unit entries {0}".format(unit_set))
         units = next(iter(unit_set))
 
-    # get values to scale by
+    # get scaling information
     scaling = None
     if axis.get("scaling"):
-        if axis.get("scaling").get("column"):
-            if axis.get("scaling").get("column").get("x_value"):
-                scaling = "{0} {1}".format(axis.get("scaling").get("column").get("x_value"), axis.get("scaling").get("column").get("name"))
-            else:
-                scaling = axis.get("scaling").get("column").get("name")
+        if axis["scaling"].get("column"):
+            scaling_column = axis["scaling"]["column"]["name"]
+            x_value = axis["scaling"]["column"].get("x_value")
+            scaling = "{0} {1}".format(x_value, scaling_column) if x_value else scaling_column
+        else:
+            scaling = str(axis["scaling"].get("custom"))
 
     # determine axis label
     label = "{0}{1}{2}".format(col_name.replace("_", " ").title(),

From 1c47a1ab9134f09cb42a747ab3ad3e77e77a1e51 Mon Sep 17 00:00:00 2001
From: pineapple-cat <em.dubrovska@gmail.com>
Date: Wed, 8 Nov 2023 17:23:55 +0000
Subject: [PATCH 11/35] Added custom value scaling unit tests.

---
 post-processing/test_post_processing.py | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/post-processing/test_post_processing.py b/post-processing/test_post_processing.py
index 8d5efb93..d05a5101 100644
--- a/post-processing/test_post_processing.py
+++ b/post-processing/test_post_processing.py
@@ -241,17 +241,28 @@ def test_high_level_script(run_sombrero):
     # check returned subset is as expected
     assert len(df) == 4
 
-    # check correct scaling
+    # check correct column scaling
     dfs = post_.run_post_processing(sombrero_log_path, {"title": "Title", "filters": {"and": [["cpus_per_task", "==", 2]], "or": []}, "series": [], "x_axis": {"value": "tasks", "units": {"custom": None}}, "y_axis": {"value": "flops_value", "units": {"column": "flops_unit"}, "scaling": {"column": {"name": "OMP_NUM_THREADS"}}}, "column_types": {"tasks": "int", "flops_value": "float", "flops_unit": "str", "cpus_per_task": "int", "OMP_NUM_THREADS": "int"}})
     # check flops values are halved compared to previous df
     assert (dfs["flops_value"] == df[df["cpus_per_task"] == 2]["flops_value"]/2).all()
 
-    # check expected failure from scaling by incorrect type
+    # check correct custom scaling
+    dfs = post_.run_post_processing(sombrero_log_path, {"title": "Title", "filters": {"and": [["cpus_per_task", "==", 2]], "or": []}, "series": [], "x_axis": {"value": "tasks", "units": {"custom": None}}, "y_axis": {"value": "flops_value", "units": {"column": "flops_unit"}, "scaling": {"custom": 2}}, "column_types": {"tasks": "int", "flops_value": "float", "flops_unit": "str", "cpus_per_task": "int"}})
+    # check flops values are halved compared to previous df
+    assert (dfs["flops_value"] == df[df["cpus_per_task"] == 2]["flops_value"]/2).all()
+
+    # check expected failure from scaling by incorrect column type
     try:
         df = post_.run_post_processing(sombrero_log_path, {"title": "Title", "filters": {"and": [], "or": []}, "series": [["cpus_per_task", 1], ["cpus_per_task", 2]], "x_axis": {"value": "tasks", "units": {"custom": None}}, "y_axis": {"value": "flops_value", "units": {"column": "flops_unit"}, "scaling": {"column": {"name": "OMP_NUM_THREADS"}}}, "column_types": {"tasks": "int", "flops_value": "float", "flops_unit": "str", "cpus_per_task": "int", "OMP_NUM_THREADS": "str"}})
     except TypeError:
         assert True
 
+    # check expected failure from scaling by incompatible custom type
+    try:
+        df = post_.run_post_processing(sombrero_log_path, {"title": "Title", "filters": {"and": [["cpus_per_task", "==", 2]], "or": []}, "series": [], "x_axis": {"value": "tasks", "units": {"custom": None}}, "y_axis": {"value": "flops_value", "units": {"column": "flops_unit"}, "scaling": {"custom": "s"}}, "column_types": {"tasks": "int", "flops_value": "float", "flops_unit": "str", "cpus_per_task": "int"}})
+    except ValueError:
+        assert True
+
     # check correct concatenation of two dataframes with different columns
     try:
         # get collated dataframe subset

From 4daef87f07f1c97488582f3d769fee444e5f8925 Mon Sep 17 00:00:00 2001
From: pineapple-cat <em.dubrovska@gmail.com>
Date: Thu, 9 Nov 2023 18:54:29 +0000
Subject: [PATCH 12/35] Added preliminary functionality to scale by a series.

---
 post-processing/post_processing.py | 109 +++++++++++++++++------------
 1 file changed, 66 insertions(+), 43 deletions(-)

diff --git a/post-processing/post_processing.py b/post-processing/post_processing.py
index c035f6e9..dd122201 100644
--- a/post-processing/post_processing.py
+++ b/post-processing/post_processing.py
@@ -170,14 +170,30 @@ def run_post_processing(self, log_path, config):
         if num_filtered_rows > num_x_data_points:
             raise RuntimeError("Unexpected number of rows ({0}) does not match number of unique x-axis values per series ({1})".format(num_filtered_rows, num_x_data_points), df[columns][mask])
 
-        # apply data transformation per series
-        if series_filters:
-            for f in series_filters:
-                m = self.row_filter(f, df)
-                df[mask & m] = self.transform_axis(df[mask & m], config["y_axis"], config["x_axis"]["value"])
-        # apply data transformation to all data
-        else:
-            df[mask] = self.transform_axis(df[mask], config["y_axis"], config["x_axis"]["value"])
+        scaling_column = None
+        scaling_series_mask = None
+        scaling_x_value_mask = None
+        # extract scaling information
+        if config["y_axis"].get("scaling"):
+
+            if config["y_axis"]["scaling"].get("column"):
+                # copy scaling column (prevents issues when scaling by itself)
+                scaling_column = df[config["y_axis"]["scaling"]["column"]["name"]].copy()
+                # get mask of scaling series
+                if config["y_axis"]["scaling"]["column"].get("series") is not None:
+                    scaling_series_mask = self.row_filter(series_filters[config["y_axis"]["scaling"]["column"]["series"]], df)
+                # get mask of scaling x-value
+                if config["y_axis"]["scaling"]["column"].get("x_value"):
+                    scaling_x_value_mask = df[config["x_axis"]["value"]] == config["y_axis"]["scaling"]["column"]["x_value"]
+
+            # apply data transformation per series
+            if series_filters:
+                for f in series_filters:
+                    m = self.row_filter(f, df)
+                    df[mask & m] = self.transform_axis(df[mask & m], mask & m, config["y_axis"], scaling_column, scaling_series_mask, scaling_x_value_mask)
+            # apply data transformation to all data
+            else:
+                df[mask] = self.transform_axis(df[mask], mask, config["y_axis"], scaling_column, scaling_series_mask, scaling_x_value_mask)
 
         print("Selected dataframe:")
         print(df[columns][mask])
@@ -315,7 +331,7 @@ def row_filter(self, filter, df: pd.DataFrame):
 
         return mask
 
-    def transform_axis(self, df: pd.DataFrame, axis, x_column):
+    def transform_axis(self, df: pd.DataFrame, df_mask, axis, scaling_column, scaling_series_mask, scaling_x_value_mask):
         """
             Divide axis values by specified values and reflect this change in the dataframe.
 
@@ -326,39 +342,43 @@ def transform_axis(self, df: pd.DataFrame, axis, x_column):
         """
 
         # FIXME: try to make this an in-place process
-        if axis.get("scaling"):
-
-            # scale by column
-            if axis["scaling"].get("column"):
-
-                scaling_column = axis["scaling"]["column"]["name"]
-                x_value = axis["scaling"]["column"].get("x_value")
-
-                # check types
-                if not pd.api.types.is_numeric_dtype(df[axis["value"]].dtype) or \
-                   not pd.api.types.is_numeric_dtype(df[scaling_column].dtype):
-                    # both columns must be numeric
-                    raise TypeError("Cannot scale column '{0}' of type {1} by column '{2}' of type {3}."
-                                    .format(axis["value"], df[axis["value"]].dtype,
-                                            scaling_column, df[scaling_column].dtype))
-
-                # scale by specific value in column
-                if x_value:
-                    df[axis["value"]] /= df[df[x_column] == x_value][scaling_column].iloc[0]
-                # scale by entire column
-                else:
-                    df[axis["value"]] /= df[axis["scaling"]["column"]["name"]]
-
-            # scale by custom value
-            elif axis["scaling"].get("custom"):
-                scaling_value = axis["scaling"]["custom"]
-                try:
-                    # interpret scaling value as column dtype
-                    scaling_value = pd.Series(scaling_value, dtype=df[axis["value"]].dtype).iloc[0]
-                except ValueError as e:
-                    e.args = (e.args[0] + " as a scaling value for column '{0}'".format(axis["value"]),)
-                    raise
-                df[axis["value"]] /= scaling_value
+
+        # scale by column
+        if scaling_column is not None:
+
+            # check types
+            if not pd.api.types.is_numeric_dtype(df[axis["value"]].dtype) or \
+                not pd.api.types.is_numeric_dtype(scaling_column.dtype):
+                # both columns must be numeric
+                raise TypeError("Cannot scale column '{0}' of type {1} by column '{2}' of type {3}."
+                                .format(axis["value"], df[axis["value"]].dtype,
+                                        axis["scaling"]["column"]["name"], scaling_column.dtype))
+
+            # get mask of scaling value(s)
+            scaling_mask = df_mask.copy()
+            if scaling_series_mask is not None:
+                scaling_mask = scaling_series_mask
+            if scaling_x_value_mask is not None:
+                scaling_mask &= scaling_x_value_mask
+
+            scaling_val = scaling_column[scaling_mask].iloc[0] if len(scaling_column[scaling_mask]) == 1 \
+                            else scaling_column[scaling_mask].values
+
+            # FIXME: add a check that the masked scaling column has the same number of values
+            # as the masked df (unless there is only one scaling value)
+
+            df[axis["value"]] = df[axis["value"]].values / scaling_val
+
+        # scale by custom value
+        elif axis["scaling"].get("custom"):
+            scaling_value = axis["scaling"]["custom"]
+            try:
+                # interpret scaling value as column dtype
+                scaling_value = pd.Series(scaling_value, dtype=df[axis["value"]].dtype).iloc[0]
+            except ValueError as e:
+                e.args = (e.args[0] + " as a scaling value for column '{0}'".format(axis["value"]),)
+                raise
+            df[axis["value"]] /= scaling_value
 
         return df
 
@@ -524,8 +544,11 @@ def get_axis_info(df: pd.DataFrame, axis):
     if axis.get("scaling"):
         if axis["scaling"].get("column"):
             scaling_column = axis["scaling"]["column"]["name"]
+            series_index = axis["scaling"]["column"].get("series")
             x_value = axis["scaling"]["column"].get("x_value")
-            scaling = "{0} {1}".format(x_value, scaling_column) if x_value else scaling_column
+            series_col = "series {0} of {1}".format(series_index, scaling_column) \
+                         if series_index is not None else scaling_column
+            scaling = "{0} {1}".format(x_value, series_col) if x_value else series_col
         else:
             scaling = str(axis["scaling"].get("custom"))
 

From 2d6db1e0976dc1c7460c8d4f1188c9f841daee7d Mon Sep 17 00:00:00 2001
From: pineapple-cat <em.dubrovska@gmail.com>
Date: Fri, 10 Nov 2023 15:03:30 +0000
Subject: [PATCH 13/35] Minor fixes + making axis label clearer.

---
 post-processing/post_processing.py | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/post-processing/post_processing.py b/post-processing/post_processing.py
index dd122201..e609dd9f 100644
--- a/post-processing/post_processing.py
+++ b/post-processing/post_processing.py
@@ -176,6 +176,8 @@ def run_post_processing(self, log_path, config):
         # extract scaling information
         if config["y_axis"].get("scaling"):
 
+            # FIXME: if there is a scaling field, check that there is at least one of column or custom
+
             if config["y_axis"]["scaling"].get("column"):
                 # copy scaling column (prevents issues when scaling by itself)
                 scaling_column = df[config["y_axis"]["scaling"]["column"]["name"]].copy()
@@ -221,8 +223,8 @@ def plot_generic(self, title, df: pd.DataFrame, x_axis, y_axis, series_filters):
         """
 
         # get column names and labels for axes
-        x_column, x_label = get_axis_info(df, x_axis)
-        y_column, y_label = get_axis_info(df, y_axis)
+        x_column, x_label = get_axis_info(df, x_axis, series_filters)
+        y_column, y_label = get_axis_info(df, y_axis, series_filters)
 
         # find x-axis groups (series columns)
         groups = [x_column]
@@ -336,9 +338,12 @@ def transform_axis(self, df: pd.DataFrame, df_mask, axis, scaling_column, scalin
             Divide axis values by specified values and reflect this change in the dataframe.
 
             Args:
-                df: dataframe, data to plot.
+                df: dataframe, data to plot (pre-masked by series, if present).
+                df_mask: bool list, the mask (pre-)applied to the df argument.
                 axis: dict, axis column, units, and values to scale by.
-                x_column: string, name of column containing x-axis values.
+                scaling_column: dataframe column, copy of column containing values to scale by.
+                scaling_series_mask: bool list, a series mask to be applied to the scaling column.
+                scaling_x_value_mask: bool list, an x-axis value mask to be applied to the scaling column.
         """
 
         # FIXME: try to make this an in-place process
@@ -519,7 +524,7 @@ def insert_key_cols(df: pd.DataFrame, index, results):
         # insert keys as new columns
         df.insert(index, k, [r[k] if k in r.keys() else None for r in results])
 
-def get_axis_info(df: pd.DataFrame, axis):
+def get_axis_info(df: pd.DataFrame, axis, series_filters):
     """
         Return the column name and label for a given axis. If a column name is supplied as units information, the actual units will be extracted from a dataframe.
 
@@ -546,7 +551,7 @@ def get_axis_info(df: pd.DataFrame, axis):
             scaling_column = axis["scaling"]["column"]["name"]
             series_index = axis["scaling"]["column"].get("series")
             x_value = axis["scaling"]["column"].get("x_value")
-            series_col = "series {0} of {1}".format(series_index, scaling_column) \
+            series_col = "{0} {1}".format(series_filters[series_index][2], scaling_column) \
                          if series_index is not None else scaling_column
             scaling = "{0} {1}".format(x_value, series_col) if x_value else series_col
         else:

From b5f24d4c27ba845eeb652b5f33c0762f92dc0834 Mon Sep 17 00:00:00 2001
From: pineapple-cat <em.dubrovska@gmail.com>
Date: Fri, 10 Nov 2023 15:07:35 +0000
Subject: [PATCH 14/35] Bug fix for legend labels of plots without series.

---
 post-processing/post_processing.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/post-processing/post_processing.py b/post-processing/post_processing.py
index e609dd9f..e3c03718 100644
--- a/post-processing/post_processing.py
+++ b/post-processing/post_processing.py
@@ -269,7 +269,9 @@ def plot_generic(self, title, df: pd.DataFrame, x_axis, y_axis, series_filters):
         index_cmap = factor_cmap(index_group_col, palette=viridis(len(colour_factors)), factors=colour_factors, start=len(groups)-1, end=len(groups))
         # add legend labels to data source
         data_source = ColumnDataSource(grouped_df).data
-        legend_labels = ["{0} = {1}".format(groups[-1].replace("_", " "), group[-1]) for group in data_source[index_group_col]]
+        legend_labels = ["{0} = {1}".format(groups[-1].replace("_", " "),
+                                            group[-1] if len(groups) > 1 else group)
+                         for group in data_source[index_group_col]]
         data_source["legend_labels"] = legend_labels
 
         # add bars

From 257647507b43e4c05ec3b317ec0dddc489a81bd5 Mon Sep 17 00:00:00 2001
From: pineapple-cat <em.dubrovska@gmail.com>
Date: Fri, 17 Nov 2023 15:30:16 +0000
Subject: [PATCH 15/35] Updated read_config errors.

---
 post-processing/post_processing.py | 39 +++++++++++++++++++++---------
 1 file changed, 28 insertions(+), 11 deletions(-)

diff --git a/post-processing/post_processing.py b/post-processing/post_processing.py
index e3c03718..a344fbb2 100644
--- a/post-processing/post_processing.py
+++ b/post-processing/post_processing.py
@@ -422,32 +422,49 @@ def read_config(path):
 
     # check x-axis information
     if not config.get("x_axis"):
-        raise KeyError("Missing x-axis information")
+        raise KeyError("Missing x-axis information.")
     if not config.get("x_axis").get("value"):
-        raise KeyError("Missing x-axis value information")
+        raise KeyError("Missing x-axis value information.")
     if not config.get("x_axis").get("units"):
-        raise KeyError("Missing x-axis units information")
+        raise KeyError("Missing x-axis units information.")
+    if config.get("x_axis").get("units").get("custom") is not None and \
+       config.get("x_axis").get("units").get("column") is not None:
+        raise KeyError("Specify x-axis units information as only one of 'custom' or 'column'.")
+
     # check y-axis information
     if not config.get("y_axis"):
-        raise KeyError("Missing y-axis information")
+        raise KeyError("Missing y-axis information.")
     if not config.get("y_axis").get("value"):
-        raise KeyError("Missing y-axis value information")
+        raise KeyError("Missing y-axis value information.")
     if not config.get("y_axis").get("units"):
-        raise KeyError("Missing y-axis units information")
+        raise KeyError("Missing y-axis units information.")
+    if config.get("y_axis").get("units").get("custom") is not None and \
+       config.get("y_axis").get("units").get("column") is not None:
+        raise KeyError("Specify y-axis units information as only one of 'custom' or 'column'.")
+
+    # check optional scaling information
+    if config.get("y_axis").get("scaling"):
+        if config.get("y_axis").get("scaling").get("custom") is not None and \
+           config.get("y_axis").get("scaling").get("column") is not None:
+            raise KeyError("Specify y-axis scaling information as only one of 'custom' or 'column'.")
 
     # check series length
     if config.get("series") is None:
-        raise KeyError("Missing series information (specify an empty list [] if there is only one series)")
+        raise KeyError("Missing series information (specify an empty list [] if there is only one series).")
     if len(config["series"]) == 1:
-        raise KeyError("Number of series must be >= 2 (specify an empty list [] if there is only one series)")
+        raise KeyError("Number of series must be >= 2 (specify an empty list [] if there is only one series).")
 
     # check filters are present
-    if config.get("filters") is None:
-        raise KeyError("Missing filters information (specify an empty list [] if none are required)")
+    if not config.get("filters"):
+        raise KeyError("Missing filter information (specify 'and' and 'or' filters).")
+    if config.get("filters").get("and") is None:
+        raise KeyError("Missing 'and' filters (specify an empty list [] if none are required).")
+    if config.get("filters").get("or") is None:
+        raise KeyError("Missing 'or' filters (specify an empty list [] if none are required).")
 
     # check plot title information
     if not config.get("title"):
-        raise KeyError("Missing plot title information")
+        raise KeyError("Missing plot title information.")
 
     return config
 

From d9fde628bc42062f235570668fdcb6fa9473b459 Mon Sep 17 00:00:00 2001
From: pineapple-cat <em.dubrovska@gmail.com>
Date: Fri, 17 Nov 2023 15:45:53 +0000
Subject: [PATCH 16/35] Added check to ensure custom scaling value cannot be
 zero.

---
 post-processing/post_processing.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/post-processing/post_processing.py b/post-processing/post_processing.py
index a344fbb2..48dfb05b 100644
--- a/post-processing/post_processing.py
+++ b/post-processing/post_processing.py
@@ -176,8 +176,7 @@ def run_post_processing(self, log_path, config):
         # extract scaling information
         if config["y_axis"].get("scaling"):
 
-            # FIXME: if there is a scaling field, check that there is at least one of column or custom
-
+            # check column information
             if config["y_axis"]["scaling"].get("column"):
                 # copy scaling column (prevents issues when scaling by itself)
                 scaling_column = df[config["y_axis"]["scaling"]["column"]["name"]].copy()
@@ -188,6 +187,10 @@ def run_post_processing(self, log_path, config):
                 if config["y_axis"]["scaling"]["column"].get("x_value"):
                     scaling_x_value_mask = df[config["x_axis"]["value"]] == config["y_axis"]["scaling"]["column"]["x_value"]
 
+            # check custom value is not zero
+            elif not config["y_axis"]["scaling"].get("custom"):
+                raise RuntimeError("Invalid custom scaling value (cannot divide by {0}).".format(config["y_axis"]["scaling"].get("custom")))
+
             # apply data transformation per series
             if series_filters:
                 for f in series_filters:

From a1a4b9620149bed397a10f5f4310c5c5ccf5702c Mon Sep 17 00:00:00 2001
From: pineapple-cat <em.dubrovska@gmail.com>
Date: Mon, 20 Nov 2023 15:28:53 +0000
Subject: [PATCH 17/35] Added initial attempt at sorting categorical x-axis.
 FIXME: dataframe sorting not reflected in bokeh graph.

---
 post-processing/post_processing.py | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/post-processing/post_processing.py b/post-processing/post_processing.py
index 48dfb05b..8bc37d10 100644
--- a/post-processing/post_processing.py
+++ b/post-processing/post_processing.py
@@ -200,6 +200,21 @@ def run_post_processing(self, log_path, config):
             else:
                 df[mask] = self.transform_axis(df[mask], mask, config["y_axis"], scaling_column, scaling_series_mask, scaling_x_value_mask)
 
+        # FIXME: sorted dataframe doesn't translate to sorted bokeh graph
+        if config["x_axis"].get("sorting"):
+            ascending = None
+            if config["x_axis"]["sorting"] == "ascending":
+                ascending = True
+            elif config["x_axis"]["sorting"] == "descending":
+                ascending = False
+            if ascending is not None:
+                # sort x values
+                df.sort_values([config["x_axis"]["value"]], ascending=ascending, inplace=True, ignore_index=True)
+                # NOTE: currently assuming there can only be one series column
+                if series_columns:
+                    # sort series column
+                    df.sort_values(series_columns[0], ascending=ascending, inplace=True, ignore_index=True)
+
         print("Selected dataframe:")
         print(df[columns][mask])
 
@@ -240,7 +255,7 @@ def plot_generic(self, title, df: pd.DataFrame, x_axis, y_axis, series_filters):
         # combine group names for later plotting with groupby
         index_group_col = "_".join(groups)
         # group by group names (or just x-axis if no other groups are present)
-        grouped_df = df.groupby(x_column) if len(groups) == 1 else df.groupby(groups)
+        grouped_df = df.groupby(x_column, sort=False) if len(groups) == 1 else df.groupby(groups, sort=False)
 
         if self.debug:
             print("")

From 556dca2fd312fe34f401f9b32953a742717d6e68 Mon Sep 17 00:00:00 2001
From: pineapple-cat <em.dubrovska@gmail.com>
Date: Thu, 30 Nov 2023 21:38:14 +0000
Subject: [PATCH 18/35] Added more data transform unit tests.

---
 post-processing/test_post_processing.py | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/post-processing/test_post_processing.py b/post-processing/test_post_processing.py
index d05a5101..a4a8c7b3 100644
--- a/post-processing/test_post_processing.py
+++ b/post-processing/test_post_processing.py
@@ -237,14 +237,28 @@ def test_high_level_script(run_sombrero):
     assert len(df) == 2
 
     # check correct or filtering
-    df = post_.run_post_processing(sombrero_log_path, {"title": "Title", "filters": {"and": [], "or": [["tasks", ">", "1"], ["tasks", "<", "2"]]}, "series": [["cpus_per_task", "1"], ["cpus_per_task", "2"]], "x_axis": {"value": "cpus_per_task", "units": {"custom": None}}, "y_axis": {"value": "flops_value", "units": {"column": "flops_unit"}}, "column_types": {"tasks": "int", "cpus_per_task": "int", "flops_value": "float", "flops_unit": "str"}})
+    df = post_.run_post_processing(sombrero_log_path, {"title": "Title", "filters": {"and": [], "or": [["tasks", ">", "1"], ["tasks", "<", "2"]]}, "series": [["cpus_per_task", "1"], ["cpus_per_task", "2"]], "x_axis": {"value": "tasks", "units": {"custom": None}}, "y_axis": {"value": "flops_value", "units": {"column": "flops_unit"}}, "column_types": {"tasks": "int", "cpus_per_task": "int", "flops_value": "float", "flops_unit": "str"}})
     # check returned subset is as expected
     assert len(df) == 4
 
     # check correct column scaling
     dfs = post_.run_post_processing(sombrero_log_path, {"title": "Title", "filters": {"and": [["cpus_per_task", "==", 2]], "or": []}, "series": [], "x_axis": {"value": "tasks", "units": {"custom": None}}, "y_axis": {"value": "flops_value", "units": {"column": "flops_unit"}, "scaling": {"column": {"name": "OMP_NUM_THREADS"}}}, "column_types": {"tasks": "int", "flops_value": "float", "flops_unit": "str", "cpus_per_task": "int", "OMP_NUM_THREADS": "int"}})
     # check flops values are halved compared to previous df
-    assert (dfs["flops_value"] == df[df["cpus_per_task"] == 2]["flops_value"]/2).all()
+    assert (dfs["flops_value"].values == df[df["cpus_per_task"] == 2]["flops_value"].values/2).all()
+
+    # check correct column + series scaling
+    dfs = post_.run_post_processing(sombrero_log_path, {"title": "Title", "filters": {"and": [], "or": []}, "series": [["cpus_per_task", 1], ["cpus_per_task", 2]], "x_axis": {"value": "tasks", "units": {"custom": None}}, "y_axis": {"value": "flops_value", "units": {"column": "flops_unit"}, "scaling": {"column": {"name": "flops_value", "series": 0}}}, "column_types": {"tasks": "int", "flops_value": "float", "flops_unit": "str", "cpus_per_task": "int"}})
+    assert (dfs[dfs["cpus_per_task"] == 1]["flops_value"].values ==
+            df[df["cpus_per_task"] == 1]["flops_value"].values /
+            df[df["cpus_per_task"] == 1]["flops_value"].values).all()
+    assert (dfs[dfs["cpus_per_task"] == 2]["flops_value"].values ==
+            df[df["cpus_per_task"] == 2]["flops_value"].values /
+            df[df["cpus_per_task"] == 1]["flops_value"].values).all()
+
+    # check correct column + series + x value scaling
+    dfs = post_.run_post_processing(sombrero_log_path, {"title": "Title", "filters": {"and": [], "or": []}, "series": [["cpus_per_task", 1], ["cpus_per_task", 2]], "x_axis": {"value": "tasks", "units": {"custom": None}}, "y_axis": {"value": "flops_value", "units": {"column": "flops_unit"}, "scaling": {"column": {"name": "flops_value", "series": 0, "x_value": 2}}}, "column_types": {"tasks": "int", "flops_value": "float", "flops_unit": "str", "cpus_per_task": "int"}})
+    assert (dfs["flops_value"].values == df["flops_value"].values /
+            df[(df["cpus_per_task"] == 1) & (df["tasks"] == 2)]["flops_value"].iloc[0]).all()
 
     # check correct custom scaling
     dfs = post_.run_post_processing(sombrero_log_path, {"title": "Title", "filters": {"and": [["cpus_per_task", "==", 2]], "or": []}, "series": [], "x_axis": {"value": "tasks", "units": {"custom": None}}, "y_axis": {"value": "flops_value", "units": {"column": "flops_unit"}, "scaling": {"custom": 2}}, "column_types": {"tasks": "int", "flops_value": "float", "flops_unit": "str", "cpus_per_task": "int"}})

From 608fd0726f6f46d2c46a421a17d85cb561edbf74 Mon Sep 17 00:00:00 2001
From: pineapple-cat <em.dubrovska@gmail.com>
Date: Fri, 1 Dec 2023 10:35:13 +0000
Subject: [PATCH 19/35] Updated documentation to explain scaling and possible
 data transformations.

---
 post-processing/README.md                   | 117 +++++++++++++++++++-
 post-processing/post_processing_config.yaml |   7 ++
 2 files changed, 121 insertions(+), 3 deletions(-)

diff --git a/post-processing/README.md b/post-processing/README.md
index 83b0f66c..f20b5ad5 100644
--- a/post-processing/README.md
+++ b/post-processing/README.md
@@ -4,14 +4,16 @@
 
 The post-processing scripts provided with the ExCALIBUR tests package are intended to grant users a quick starting point for visualising benchmark results with basic graphs and tables. Their components can also be used inside custom users' scripts.
 
-There are three main post-processing components:
+There are four main post-processing components:
 - **`Perflog parsing`:**
   - Data from benchmark performance logs are stored in a pandas DataFrame.
 - **`Data filtering`:**
   - If more than one perflog is used for plotting, DataFrames from individual perflogs are concatenated together into one DataFrame.
   - The DataFrame is then filtered, keeping only relevant rows and columns.
+- **`Data transformation`:**
+  - Axis value columns in the DataFrame are scaled according to user specifications.
 - **`Plotting`:**
-  - A filtered DataFrame is passed to a plotting script, which produces a graph and embeds it in a simple HTML file.
+  - A filtered and transformed DataFrame is passed to a plotting script, which produces a graph and embeds it in a simple HTML file.
   - Users may run the plotting script to generate a generic bar chart. Graph settings should be specified in a configuration YAML file.
 
 ### Installation
@@ -34,12 +36,13 @@ Run `post_processing.py -h` for more information (including debugging flags).
 
 ### Configuration Structure
 
-Before running post-processing, create a config file including all necessary information for graph generation (you must specify at least plot title, x-axis, y-axis, and column types). See below for an example.
+Before running post-processing, create a config file including all necessary information for graph generation (you must specify at least plot title, x-axis, y-axis, and column types). See below for an example and some clarifying notes.
 
 - `title` - Plot title.
 - `x_axis`, `y_axis` - Axis information.
     - `value` - Axis data points. Specified with a column name.
     - `units` - Axis units. Specified either with a column name or a custom label (may be null).
+    - `scaling` - (Optional.) Scale axis values by either a column or a custom value.
 - `filters` - (Optional.) Filter data rows based on specified conditions. (Specify an empty list if no filters are required.)
   - `and` - Filter mask is determined from a logical AND of conditions in list.
   - `or` - Filter mask is determined from a logical OR of conditions in list.
@@ -64,6 +67,11 @@ y_axis:
   value: "y_axis_col"
   units:
     column: "unit_col"
+  scaling:
+    column:
+      name: "scaling_col"
+      series: 0
+      x_value: "x_val_s"
 
 filters:
   and: [["filter_col_1", "<=", filter_val_1], ["filter_col_2", "!=", filter_val_2]]
@@ -75,6 +83,7 @@ column_types:
   x_axis_col: "str"
   y_axis_col: "float"
   unit_col: "str"
+  scaling_col: "float"
   filter_col_1: "datetime"
   filter_col_2: "int"
   series_col: "str"
@@ -89,6 +98,108 @@ The settings above will produce a graph that will have its x-axis data grouped b
 - (`x_val_2`, `series_val_1`)
 - (`x_val_2`, `series_val_2`)
 
+#### A Note on Scaling
+
+When axis values are scaled, they are all divided by a number or a list of numbers. If using more than one number for scaling, the length of the list must match the length of the axis column being scaled. (`Note: scaling is currently only supported for y-axis data, as graphs with a non-categorical x-axis are still a work in progress.`)
+
+**Custom Scaling**
+
+Manually specify one value to scale axis values by.
+
+```yaml
+y_axis:
+  value: "y_axis_col"
+  units:
+    column: "unit_col"
+  scaling:
+    custom: 2
+```
+
+In the snippet above, all y-axis values are to be divided by 2.
+
+|y_axis_col|scaled_y_axis_col|
+|-|-|
+|3.2|3.2 / 2.0 = 1.6|
+|5.4|5.4 / 2.0 = 2.7|
+|2.4|2.4 / 2.0 = 1.2|
+|5.0|5.0 / 2.0 = 2.5|
+
+**Column Scaling**
+
+Specify one column to scale axis values by.
+
+```yaml
+y_axis:
+  value: "y_axis_col"
+  units:
+    column: "unit_col"
+  scaling:
+    column:
+      name: "scaling_col"
+```
+
+In the snippet above, all y-axis values are to be divided by the corresponding values in the scaling column.
+
+|y_axis_col|scaling_col|scaled_y_axis_col|
+|-|-|-|
+|3.2|**`1.6`**|3.2 / 1.6 = 2.0|
+|5.4|**`2.0`**|5.4 / 2.0 = 2.7|
+|2.4|**`0.6`**|2.4 / 0.6 = 4.0|
+|5.0|**`2.5`**|5.0 / 2.5 = 2.0|
+
+**Series Scaling**
+
+Specify one series to scale axis values by. This is done with an index, which is used to find the correct series from a list.
+
+In the case of the list of series from the example config above, index 0 would select a scaling series of `["series_col", "series_val_1"]`, while index 1 would scale by `["series_col", "series_val_2"]`.
+
+```yaml
+y_axis:
+  value: "y_axis_col"
+  units:
+    column: "unit_col"
+  scaling:
+    column:
+      name: "scaling_col"
+      series: 0
+```
+
+In the snippet above, all y-axis values are to be split by series and divided by the corresponding values in the scaling series.
+
+|y_axis_col|scaling_col|series_col|scaled_y_axis_col|
+|-|-|-|-|
+|3.2|**`1.6`**|`series_val_1`|3.2 / 1.6 = 2.0|
+|5.4|**`2.0`**|`series_val_1`|5.4 / 2.0 = 2.7|
+|2.4|0.6|series_val_2|2.4 / 1.6 = 1.5|
+|5.0|2.5|series_val_2|5.0 / 2.0 = 2.5|
+
+**Selected Value Scaling**
+
+Specify one value from a column to scale axis values by.
+
+```yaml
+y_axis:
+  value: "y_axis_col"
+  units:
+    column: "unit_col"
+  scaling:
+    column:
+      name: "scaling_col"
+      series: 0
+      x_value: "x_val_s"
+```
+
+In the snippet above, all y-axis values are to be divided by the scaling value found by filtering the scaling column by both series and x-axis value.
+
+|x_axis_col|y_axis_col|scaling_col|series_col|scaled_y_axis_col|
+|-|-|-|-|-|
+|x_val_1|3.2|1.6|series_val_1|3.2 / 2.0 = 1.6|
+|`x_val_s`|5.4|**`2.0`**|`series_val_1`|5.4 / 2.0 = 2.7|
+|x_val_2|2.4|0.7|series_val_2|2.4 / 2.0 = 1.2|
+|x_val_s|5.0|2.5|series_val_2|5.0 / 2.0 = 2.5|
+
+(`Note: if series are not present and x-axis values are all unique, it is enough to specify just the column name and x-value.`)
+
 #### A Note on Filters
 
 AND filters and OR filters are combined with a logical AND to produce the final filter mask applied to the DataFrame prior to graphing. For example:
diff --git a/post-processing/post_processing_config.yaml b/post-processing/post_processing_config.yaml
index f24e08a1..2e4b3521 100644
--- a/post-processing/post_processing_config.yaml
+++ b/post-processing/post_processing_config.yaml
@@ -6,6 +6,8 @@ title: Example Plot
 # and "display_name" fields, if it exists).
 # Units can either be specified with a perflog column name
 # or a custom label (including null (i.e. None) if units are N/A).
+# Optional axis scaling can either be specified with a perflog column name
+# (+ a series index and/or an x-axis value for filtering), or a custom value
 
 x_axis:
   value: "tasks"
@@ -16,6 +18,11 @@ y_axis:
   value: "flops_value"
   units:
     column: "flops_unit"
+  scaling:
+    column:
+      name: "flops_value"
+      series: 0             # The series at index 0 is ["cpus_per_task", 1]
+      x_value: 2            # Find the row where "tasks" is 2
 
 # Optional row filters (specify an empty list [] if none are required).
 # Filter format: [column_name, operator, value]

From b0ec2515e06a76866baf00b0421adf7f9f0adb5f Mon Sep 17 00:00:00 2001
From: pineapple-cat <em.dubrovska@gmail.com>
Date: Fri, 1 Dec 2023 11:22:58 +0000
Subject: [PATCH 20/35] Making use of titlecase library in graph labels to
 preserve acronyms.

---
 post-processing/post_processing.py | 8 +++++---
 pyproject.toml                     | 1 +
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/post-processing/post_processing.py b/post-processing/post_processing.py
index 8bc37d10..bff9cde7 100644
--- a/post-processing/post_processing.py
+++ b/post-processing/post_processing.py
@@ -18,6 +18,7 @@
 from bokeh.palettes import viridis
 from bokeh.plotting import figure, output_file, save
 from bokeh.transform import factor_cmap
+from titlecase import titlecase
 
 class PostProcessing:
 
@@ -588,15 +589,16 @@ def get_axis_info(df: pd.DataFrame, axis, series_filters):
             scaling_column = axis["scaling"]["column"]["name"]
             series_index = axis["scaling"]["column"].get("series")
             x_value = axis["scaling"]["column"].get("x_value")
-            series_col = "{0} {1}".format(series_filters[series_index][2], scaling_column) \
+            # FIXME: make scaling label more clear
+            series_col = "{0} in {1}".format(series_filters[series_index][2], scaling_column) \
                          if series_index is not None else scaling_column
             scaling = "{0} {1}".format(x_value, series_col) if x_value else series_col
         else:
             scaling = str(axis["scaling"].get("custom"))
 
     # determine axis label
-    label = "{0}{1}{2}".format(col_name.replace("_", " ").title(),
-                               " Scaled by {0}".format(scaling.replace("_", " ").title()) if scaling else "",
+    label = "{0}{1}{2}".format(titlecase(col_name.replace("_", " ")),
+                               titlecase(" Scaled by {0}".format(scaling.replace("_", " "))) if scaling else "",
                                " ({0})".format(units) if units else "")
 
     return col_name, label
diff --git a/pyproject.toml b/pyproject.toml
index d82eb9ec..7360d45b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -35,6 +35,7 @@ test = [
 post-processing = [
     "pandas >= 2.0.1",
     "bokeh >= 3.2.0",
+    "titlecase >= 2.4.1",
 ]
 
 [tool.setuptools_scm]

From c9ae3f29acf4ba4a2367f7fb0873a0fd8cf8d59c Mon Sep 17 00:00:00 2001
From: pineapple-cat <em.dubrovska@gmail.com>
Date: Fri, 1 Dec 2023 13:10:10 +0000
Subject: [PATCH 21/35] Fixed simple categorical x-axis sorting.

---
 post-processing/post_processing.py | 26 ++++++++++++--------------
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/post-processing/post_processing.py b/post-processing/post_processing.py
index bff9cde7..a8553d40 100644
--- a/post-processing/post_processing.py
+++ b/post-processing/post_processing.py
@@ -201,20 +201,13 @@ def run_post_processing(self, log_path, config):
             else:
                 df[mask] = self.transform_axis(df[mask], mask, config["y_axis"], scaling_column, scaling_series_mask, scaling_x_value_mask)
 
-        # FIXME: sorted dataframe doesn't translate to sorted bokeh graph
-        if config["x_axis"].get("sorting"):
-            ascending = None
-            if config["x_axis"]["sorting"] == "ascending":
-                ascending = True
-            elif config["x_axis"]["sorting"] == "descending":
-                ascending = False
-            if ascending is not None:
-                # sort x values
-                df.sort_values([config["x_axis"]["value"]], ascending=ascending, inplace=True, ignore_index=True)
-                # NOTE: currently assuming there can only be one series column
-                if series_columns:
-                    # sort series column
-                    df.sort_values(series_columns[0], ascending=ascending, inplace=True, ignore_index=True)
+        # sort series in ascending order
+        # NOTE: currently assuming there can only be one series column
+        if series_columns:
+            # NOTE: don't use ignore_index=True, this results in unexpected behaviour
+            df.sort_values(series_columns[0], inplace=True)
+            # reset index after sorting
+            df.index = range(len(df.index))
 
         print("Selected dataframe:")
         print(df[columns][mask])
@@ -280,6 +273,11 @@ def plot_generic(self, title, df: pd.DataFrame, x_axis, y_axis, series_filters):
                                             + ("{%0.2f}" if pd.api.types.is_float_dtype(df[y_column].dtype) else ""))],
                                  formatters={"@{0}_mean".format(y_column) : "printf"}))
 
+        # sort x-axis values in ascending order (otherwise default sort is descending)
+        if x_axis.get("sort"):
+            if x_axis["sort"] == "ascending":
+                plot.x_range.factors = sorted(plot.x_range.factors, key=lambda x: x[0], reverse=True)
+
         # create legend outside plot
         plot.add_layout(Legend(), "right")
         # automatically base bar colouring on last group column

From b2d7ad9e574a2790e30ea80243060a33f677ac72 Mon Sep 17 00:00:00 2001
From: pineapple-cat <em.dubrovska@gmail.com>
Date: Fri, 1 Dec 2023 13:16:30 +0000
Subject: [PATCH 22/35] Added note on sorting categorical x-axis.

---
 post-processing/README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/post-processing/README.md b/post-processing/README.md
index f20b5ad5..d288cb93 100644
--- a/post-processing/README.md
+++ b/post-processing/README.md
@@ -43,6 +43,7 @@ Before running post-processing, create a config file including all necessary inf
     - `value` - Axis data points. Specified with a column name.
     - `units` - Axis units. Specified either with a column name or a custom label (may be null).
     - `scaling` - (Optional.) Scale axis values by either a column or a custom value.
+    - `sort` - (Optional.) Sort categorical x-axis in ascending order (otherwise values are sorted in descending order by default).
 - `filters` - (Optional.) Filter data rows based on specified conditions. (Specify an empty list if no filters are required.)
   - `and` - Filter mask is determined from a logical AND of conditions in list.
   - `or` - Filter mask is determined from a logical OR of conditions in list.
@@ -62,6 +63,7 @@ x_axis:
   value: "x_axis_col"
   units:
     custom: "unit_label"
+  sort: "ascending"
 
 y_axis:
   value: "y_axis_col"

From 562f4aac2e47207ce30ddff9dd1f9f5cc29525ad Mon Sep 17 00:00:00 2001
From: pineapple-cat <em.dubrovska@gmail.com>
Date: Fri, 1 Dec 2023 13:30:48 +0000
Subject: [PATCH 23/35] Fixed stray missing detail in unit test.

---
 post-processing/test_post_processing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/post-processing/test_post_processing.py b/post-processing/test_post_processing.py
index a4a8c7b3..7638388c 100644
--- a/post-processing/test_post_processing.py
+++ b/post-processing/test_post_processing.py
@@ -263,7 +263,7 @@ def test_high_level_script(run_sombrero):
     # check correct custom scaling
     dfs = post_.run_post_processing(sombrero_log_path, {"title": "Title", "filters": {"and": [["cpus_per_task", "==", 2]], "or": []}, "series": [], "x_axis": {"value": "tasks", "units": {"custom": None}}, "y_axis": {"value": "flops_value", "units": {"column": "flops_unit"}, "scaling": {"custom": 2}}, "column_types": {"tasks": "int", "flops_value": "float", "flops_unit": "str", "cpus_per_task": "int"}})
     # check flops values are halved compared to previous df
-    assert (dfs["flops_value"] == df[df["cpus_per_task"] == 2]["flops_value"]/2).all()
+    assert (dfs["flops_value"].values == df[df["cpus_per_task"] == 2]["flops_value"].values/2).all()
 
     # check expected failure from scaling by incorrect column type
     try:

From 4ecbb82fd13d0f64425bd1902422355cc6bab064 Mon Sep 17 00:00:00 2001
From: pineapple-cat <em.dubrovska@gmail.com>
Date: Mon, 4 Dec 2023 15:05:27 +0000
Subject: [PATCH 24/35] Updated dataframe sorting and fixed scaling mismatch by
 sorting before scaling.

---
 post-processing/post_processing.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/post-processing/post_processing.py b/post-processing/post_processing.py
index a8553d40..e45df0ea 100644
--- a/post-processing/post_processing.py
+++ b/post-processing/post_processing.py
@@ -171,6 +171,14 @@ def run_post_processing(self, log_path, config):
         if num_filtered_rows > num_x_data_points:
             raise RuntimeError("Unexpected number of rows ({0}) does not match number of unique x-axis values per series ({1})".format(num_filtered_rows, num_x_data_points), df[columns][mask])
 
+        sorting_columns = [config["x_axis"]["value"]]
+        # sort x-axis values and series in ascending order
+        if series_columns:
+            # NOTE: currently assuming there can only be one series column
+            sorting_columns.append(series_columns[0])
+        # sorting here is necessary to ensure correct scaling alignment
+        df.sort_values(sorting_columns, inplace=True, ignore_index=True)
+
         scaling_column = None
         scaling_series_mask = None
         scaling_x_value_mask = None
@@ -201,14 +209,6 @@ def run_post_processing(self, log_path, config):
             else:
                 df[mask] = self.transform_axis(df[mask], mask, config["y_axis"], scaling_column, scaling_series_mask, scaling_x_value_mask)
 
-        # sort series in ascending order
-        # NOTE: currently assuming there can only be one series column
-        if series_columns:
-            # NOTE: don't use ignore_index=True, this results in unexpected behaviour
-            df.sort_values(series_columns[0], inplace=True)
-            # reset index after sorting
-            df.index = range(len(df.index))
-
         print("Selected dataframe:")
         print(df[columns][mask])
 
@@ -386,7 +386,7 @@ def transform_axis(self, df: pd.DataFrame, df_mask, axis, scaling_column, scalin
                 scaling_mask &= scaling_x_value_mask
 
             scaling_val = scaling_column[scaling_mask].iloc[0] if len(scaling_column[scaling_mask]) == 1 \
-                            else scaling_column[scaling_mask].values
+                          else scaling_column[scaling_mask].values
 
             # FIXME: add a check that the masked scaling column has the same number of values
             # as the masked df (unless there is only one scaling value)

From e10e5be425df19119958e33a3cff498a2aa7efba Mon Sep 17 00:00:00 2001
From: pineapple-cat <em.dubrovska@gmail.com>
Date: Mon, 4 Dec 2023 15:09:50 +0000
Subject: [PATCH 25/35] Updated x-axis sorting to work as expected for
 non-string values.

---
 post-processing/post_processing.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/post-processing/post_processing.py b/post-processing/post_processing.py
index e45df0ea..770e50f9 100644
--- a/post-processing/post_processing.py
+++ b/post-processing/post_processing.py
@@ -243,6 +243,8 @@ def plot_generic(self, title, df: pd.DataFrame, x_axis, y_axis, series_filters):
         for f in series_filters:
             if f[0] not in groups:
                 groups.append(f[0])
+        # keep original x-axis dtype for sorting
+        x_col_dtype = df[x_column].dtype
         # all x-axis data treated as categorical
         for g in groups:
             df[g] = df[g].astype(str)
@@ -274,9 +276,13 @@ def plot_generic(self, title, df: pd.DataFrame, x_axis, y_axis, series_filters):
                                  formatters={"@{0}_mean".format(y_column) : "printf"}))
 
         # sort x-axis values in ascending order (otherwise default sort is descending)
+        reverse = False
         if x_axis.get("sort"):
             if x_axis["sort"] == "ascending":
-                plot.x_range.factors = sorted(plot.x_range.factors, key=lambda x: x[0], reverse=True)
+                reverse = True
+        plot.x_range.factors = sorted(plot.x_range.factors,
+                                      key=lambda x: pd.Series(x[0], dtype=x_col_dtype).iloc[0],
+                                      reverse=reverse)
 
         # create legend outside plot
         plot.add_layout(Legend(), "right")

From 5073413f2979304fb4139853751afd9969eedfb0 Mon Sep 17 00:00:00 2001
From: pineapple-cat <em.dubrovska@gmail.com>
Date: Mon, 4 Dec 2023 15:36:04 +0000
Subject: [PATCH 26/35] Fixed sorting for graphs without series.

---
 post-processing/post_processing.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/post-processing/post_processing.py b/post-processing/post_processing.py
index 770e50f9..ceee66b9 100644
--- a/post-processing/post_processing.py
+++ b/post-processing/post_processing.py
@@ -280,9 +280,9 @@ def plot_generic(self, title, df: pd.DataFrame, x_axis, y_axis, series_filters):
         if x_axis.get("sort"):
             if x_axis["sort"] == "ascending":
                 reverse = True
-        plot.x_range.factors = sorted(plot.x_range.factors,
-                                      key=lambda x: pd.Series(x[0], dtype=x_col_dtype).iloc[0],
-                                      reverse=reverse)
+        plot.x_range.factors = sorted(plot.x_range.factors, reverse=reverse,
+                                      key=lambda x: pd.Series(x[0] if len(groups) > 1 else x,
+                                                              dtype=x_col_dtype).iloc[0])
 
         # create legend outside plot
         plot.add_layout(Legend(), "right")

From 385e1553dba68286b1a40a649af7a92174180f5a Mon Sep 17 00:00:00 2001
From: pineapple-cat <em.dubrovska@gmail.com>
Date: Tue, 5 Dec 2023 16:49:15 +0000
Subject: [PATCH 27/35] Moved sorting to not interfere with filter mask.

---
 post-processing/post_processing.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/post-processing/post_processing.py b/post-processing/post_processing.py
index ceee66b9..4b960428 100644
--- a/post-processing/post_processing.py
+++ b/post-processing/post_processing.py
@@ -147,6 +147,14 @@ def run_post_processing(self, log_path, config):
             else:
                 raise KeyError("Could not find user-specified type for column", col)
 
+        sorting_columns = [config["x_axis"]["value"]]
+        # sort x-axis values and series in ascending order
+        if series_columns:
+            # NOTE: currently assuming there can only be one series column
+            sorting_columns.append(series_columns[0])
+        # sorting here is necessary to ensure correct filtering + scaling alignment
+        df.sort_values(sorting_columns, inplace=True, ignore_index=True)
+
         mask = pd.Series(df.index.notnull())
         # filter rows
         if and_filters:
@@ -171,14 +179,6 @@ def run_post_processing(self, log_path, config):
         if num_filtered_rows > num_x_data_points:
             raise RuntimeError("Unexpected number of rows ({0}) does not match number of unique x-axis values per series ({1})".format(num_filtered_rows, num_x_data_points), df[columns][mask])
 
-        sorting_columns = [config["x_axis"]["value"]]
-        # sort x-axis values and series in ascending order
-        if series_columns:
-            # NOTE: currently assuming there can only be one series column
-            sorting_columns.append(series_columns[0])
-        # sorting here is necessary to ensure correct scaling alignment
-        df.sort_values(sorting_columns, inplace=True, ignore_index=True)
-
         scaling_column = None
         scaling_series_mask = None
         scaling_x_value_mask = None
@@ -582,8 +582,8 @@ def get_axis_info(df: pd.DataFrame, axis, series_filters):
     if axis.get("units").get("column"):
         unit_set = set(df[axis["units"]["column"]].dropna())
         # check all rows have the same units
-        if len(unit_set) != 1:
-            raise RuntimeError("Unexpected number of axis unit entries {0}".format(unit_set))
+        #if len(unit_set) != 1:
+        #    raise RuntimeError("Unexpected number of axis unit entries {0}".format(unit_set))
         units = next(iter(unit_set))
 
     # get scaling information

From e9521084c13905dfd133c8a0eb502029a7f7521f Mon Sep 17 00:00:00 2001
From: pineapple-cat <em.dubrovska@gmail.com>
Date: Tue, 5 Dec 2023 16:55:23 +0000
Subject: [PATCH 28/35] Changed default categorical x-axis sort from descending
 to ascending.

---
 post-processing/README.md          | 4 ++--
 post-processing/post_processing.py | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/post-processing/README.md b/post-processing/README.md
index d288cb93..3a9cee50 100644
--- a/post-processing/README.md
+++ b/post-processing/README.md
@@ -43,7 +43,7 @@ Before running post-processing, create a config file including all necessary inf
     - `value` - Axis data points. Specified with a column name.
     - `units` - Axis units. Specified either with a column name or a custom label (may be null).
     - `scaling` - (Optional.) Scale axis values by either a column or a custom value.
-    - `sort` - (Optional.) Sort categorical x-axis in ascending order (otherwise values are sorted in descending order by default).
+    - `sort` - (Optional.) Sort categorical x-axis in descending order (otherwise values are sorted in ascending order by default).
 - `filters` - (Optional.) Filter data rows based on specified conditions. (Specify an empty list if no filters are required.)
   - `and` - Filter mask is determined from a logical AND of conditions in list.
   - `or` - Filter mask is determined from a logical OR of conditions in list.
@@ -63,7 +63,7 @@ x_axis:
   value: "x_axis_col"
   units:
     custom: "unit_label"
-  sort: "ascending"
+  sort: "descending"
 
 y_axis:
   value: "y_axis_col"
diff --git a/post-processing/post_processing.py b/post-processing/post_processing.py
index 4b960428..1cc13f6e 100644
--- a/post-processing/post_processing.py
+++ b/post-processing/post_processing.py
@@ -275,11 +275,11 @@ def plot_generic(self, title, df: pd.DataFrame, x_axis, y_axis, series_filters):
                                             + ("{%0.2f}" if pd.api.types.is_float_dtype(df[y_column].dtype) else ""))],
                                  formatters={"@{0}_mean".format(y_column) : "printf"}))
 
-        # sort x-axis values in ascending order (otherwise default sort is descending)
-        reverse = False
+        # sort x-axis values in descending order (otherwise default sort is ascending)
+        reverse = True
         if x_axis.get("sort"):
-            if x_axis["sort"] == "ascending":
-                reverse = True
+            if x_axis["sort"] == "descending":
+                reverse = False
         plot.x_range.factors = sorted(plot.x_range.factors, reverse=reverse,
                                       key=lambda x: pd.Series(x[0] if len(groups) > 1 else x,
                                                               dtype=x_col_dtype).iloc[0])

From 697d4766afba9c11e9129459fc1a44f92bac29e2 Mon Sep 17 00:00:00 2001
From: pineapple-cat <em.dubrovska@gmail.com>
Date: Tue, 5 Dec 2023 17:04:51 +0000
Subject: [PATCH 29/35] Adjusted graph colour sorting.

---
 post-processing/post_processing.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/post-processing/post_processing.py b/post-processing/post_processing.py
index 1cc13f6e..dbe37990 100644
--- a/post-processing/post_processing.py
+++ b/post-processing/post_processing.py
@@ -245,6 +245,7 @@ def plot_generic(self, title, df: pd.DataFrame, x_axis, y_axis, series_filters):
                 groups.append(f[0])
         # keep original x-axis dtype for sorting
         x_col_dtype = df[x_column].dtype
+        last_group_dtype = df[groups[-1]].dtype
         # all x-axis data treated as categorical
         for g in groups:
             df[g] = df[g].astype(str)
@@ -287,11 +288,13 @@ def plot_generic(self, title, df: pd.DataFrame, x_axis, y_axis, series_filters):
         # create legend outside plot
         plot.add_layout(Legend(), "right")
         # automatically base bar colouring on last group column
-        colour_factors = sorted(df[groups[-1]].unique())
+        colour_factors = [str(x) for x in sorted(pd.Series(df[groups[-1]].unique(),
+                                                           dtype=last_group_dtype))]
         # divide and assign colours
         index_cmap = factor_cmap(index_group_col, palette=viridis(len(colour_factors)), factors=colour_factors, start=len(groups)-1, end=len(groups))
         # add legend labels to data source
         data_source = ColumnDataSource(grouped_df).data
+        # FIXME: attempt to adjust legend label sorting to match new colouring
         legend_labels = ["{0} = {1}".format(groups[-1].replace("_", " "),
                                             group[-1] if len(groups) > 1 else group)
                          for group in data_source[index_group_col]]

From 32d4334be392812d0b97b7c8181daf8cdff968c0 Mon Sep 17 00:00:00 2001
From: pineapple-cat <em.dubrovska@gmail.com>
Date: Fri, 8 Dec 2023 14:39:55 +0000
Subject: [PATCH 30/35] Adjusted legend label sorting + fixed default data
 sorting order.

---
 post-processing/post_processing.py | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/post-processing/post_processing.py b/post-processing/post_processing.py
index dbe37990..908f81a6 100644
--- a/post-processing/post_processing.py
+++ b/post-processing/post_processing.py
@@ -277,16 +277,14 @@ def plot_generic(self, title, df: pd.DataFrame, x_axis, y_axis, series_filters):
                                  formatters={"@{0}_mean".format(y_column) : "printf"}))
 
         # sort x-axis values in descending order (otherwise default sort is ascending)
-        reverse = True
+        reverse = False
         if x_axis.get("sort"):
             if x_axis["sort"] == "descending":
-                reverse = False
+                reverse = True
         plot.x_range.factors = sorted(plot.x_range.factors, reverse=reverse,
                                       key=lambda x: pd.Series(x[0] if len(groups) > 1 else x,
                                                               dtype=x_col_dtype).iloc[0])
 
-        # create legend outside plot
-        plot.add_layout(Legend(), "right")
         # automatically base bar colouring on last group column
         colour_factors = [str(x) for x in sorted(pd.Series(df[groups[-1]].unique(),
                                                            dtype=last_group_dtype))]
@@ -294,14 +292,15 @@ def plot_generic(self, title, df: pd.DataFrame, x_axis, y_axis, series_filters):
         index_cmap = factor_cmap(index_group_col, palette=viridis(len(colour_factors)), factors=colour_factors, start=len(groups)-1, end=len(groups))
         # add legend labels to data source
         data_source = ColumnDataSource(grouped_df).data
-        # FIXME: attempt to adjust legend label sorting to match new colouring
         legend_labels = ["{0} = {1}".format(groups[-1].replace("_", " "),
                                             group[-1] if len(groups) > 1 else group)
                          for group in data_source[index_group_col]]
         data_source["legend_labels"] = legend_labels
 
+        # create legend outside plot
+        plot.add_layout(Legend(), "right")
         # add bars
-        plot.vbar(x=index_group_col, top="{0}_mean".format(y_column), width=0.9, source=data_source, line_color="white", fill_color=index_cmap, legend_field="legend_labels", hover_alpha=0.9)
+        plot.vbar(x=index_group_col, top="{0}_mean".format(y_column), width=0.9, source=data_source, line_color="white", fill_color=index_cmap, legend_group="legend_labels", hover_alpha=0.9)
         # add labels
         plot.xaxis.axis_label = x_label
         plot.yaxis.axis_label = y_label
@@ -311,6 +310,14 @@ def plot_generic(self, title, df: pd.DataFrame, x_axis, y_axis, series_filters):
         # adjust font size
         plot.title.text_font_size = "15pt"
 
+        # get label values with their original dtype
+        label_values = [pd.Series(x.label.value.split("=")[1].strip(), dtype=last_group_dtype).iloc[0]
+                        for x in plot.legend[0].items]
+        # sort legend items (order determined by x-axis sort)
+        sorted_legend_items = [x[1] for x in sorted(zip(label_values, plot.legend[0].items),
+                                                    reverse=reverse, key=lambda x: x[0])]
+        plot.legend[0].items = sorted_legend_items
+
         # save to file
         save(plot)
 

From 0a653758715757ce42fe58c99728cdb7c98dab23 Mon Sep 17 00:00:00 2001
From: pineapple-cat <em.dubrovska@gmail.com>
Date: Fri, 8 Dec 2023 15:15:28 +0000
Subject: [PATCH 31/35] Style fixes (trimming long lines) + restored
 accidentally removed units check.

---
 post-processing/post_processing.py | 119 +++++++++++++++++++----------
 1 file changed, 80 insertions(+), 39 deletions(-)

diff --git a/post-processing/post_processing.py b/post-processing/post_processing.py
index 908f81a6..85fd2cc1 100644
--- a/post-processing/post_processing.py
+++ b/post-processing/post_processing.py
@@ -28,7 +28,8 @@ def __init__(self, debug=False, verbose=False):
 
     def run_post_processing(self, log_path, config):
         """
-            Return a dataframe containing the information passed to a plotting script and produce relevant graphs.
+            Return a dataframe containing the information passed to a plotting script
+            and produce relevant graphs.
 
             Args:
                 log_path: str, path to a log file or a directory containing log files.
@@ -42,12 +43,14 @@ def run_post_processing(self, log_path, config):
                 raise RuntimeError("Perflog file name provided should have a .log extension.")
             log_files = [log_path]
         elif os.path.isdir(log_path):
-            log_files_temp = [os.path.join(root, file) for root, _, files in os.walk(log_path) for file in files]
+            log_files_temp = [os.path.join(root, file) for root, _, files in os.walk(log_path)
+                              for file in files]
             for file in log_files_temp:
                 if os.path.splitext(file)[1] == ".log":
                     log_files.append(file)
             if len(log_files) == 0:
-                raise RuntimeError("No perflogs found in this path. Perflogs should have a .log extension.")
+                raise RuntimeError(
+                    "No perflogs found in this path. Perflogs should have a .log extension.")
         else:
             raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), log_path)
 
@@ -65,7 +68,8 @@ def run_post_processing(self, log_path, config):
                 df = pd.concat([df, temp], ignore_index=True)
             except KeyError as e:
                 if self.debug:
-                    print("Discarding %s:" %os.path.basename(file), type(e).__name__ + ":", e.args[0], e.args[1])
+                    print("Discarding %s:" %os.path.basename(file),
+                          type(e).__name__ + ":", e.args[0], e.args[1])
                     print("")
         if df.empty:
             raise FileNotFoundError(errno.ENOENT, "Could not find a valid perflog in path", log_path)
@@ -83,7 +87,8 @@ def run_post_processing(self, log_path, config):
         series_filters = [[s[0], "==", s[1]] for s in series]
         # check acceptable number of series
         if len(set(series_columns)) > 1:
-            raise RuntimeError("Currently supporting grouping of series by only one column. Please use a single column name in your series configuration.")
+            raise RuntimeError("Currently supporting grouping of series by only one column. \
+                               Please use a single column name in your series configuration.")
         # add series columns to dataframe column list
         for c in series_columns:
             if c not in columns:
@@ -130,13 +135,14 @@ def run_post_processing(self, log_path, config):
                     conversion_type = "float64"
                 elif pd.api.types.is_integer_dtype(conversion_type):
                     # all integers treated as Int64 (nullable)
-                    # note: default pandas integer type is int64 (not nullable)
+                    # NOTE: default pandas integer type is int64 (not nullable)
                     conversion_type = "Int64"
                 elif pd.api.types.is_datetime64_any_dtype(conversion_type):
                     # all datetimes treated as datetime64[ns] (nullable)
                     conversion_type = "datetime64[ns]"
                 else:
-                    raise RuntimeError("Unsupported user-specified type '{0}' for column '{1}'.".format(conversion_type, col))
+                    raise RuntimeError("Unsupported user-specified type '{0}' for column '{1}'." \
+                                       .format(conversion_type, col))
 
                 # skip type conversion if column is already the desired type
                 if conversion_type == df[col].dtype:
@@ -177,7 +183,9 @@ def run_post_processing(self, log_path, config):
         num_x_data_points = series_combinations * len(set(df[config["x_axis"]["value"]][mask]))
         # check expected number of rows
         if num_filtered_rows > num_x_data_points:
-            raise RuntimeError("Unexpected number of rows ({0}) does not match number of unique x-axis values per series ({1})".format(num_filtered_rows, num_x_data_points), df[columns][mask])
+            raise RuntimeError("Unexpected number of rows ({0}) does not match \
+                               number of unique x-axis values per series ({1})" \
+                               .format(num_filtered_rows, num_x_data_points), df[columns][mask])
 
         scaling_column = None
         scaling_series_mask = None
@@ -191,29 +199,37 @@ def run_post_processing(self, log_path, config):
                 scaling_column = df[config["y_axis"]["scaling"]["column"]["name"]].copy()
                 # get mask of scaling series
                 if config["y_axis"]["scaling"]["column"].get("series") is not None:
-                    scaling_series_mask = self.row_filter(series_filters[config["y_axis"]["scaling"]["column"]["series"]], df)
+                    scaling_series_mask = self.row_filter(
+                        series_filters[config["y_axis"]["scaling"]["column"]["series"]], df)
                 # get mask of scaling x-value
                 if config["y_axis"]["scaling"]["column"].get("x_value"):
-                    scaling_x_value_mask = df[config["x_axis"]["value"]] == config["y_axis"]["scaling"]["column"]["x_value"]
+                    scaling_x_value_mask = (
+                        df[config["x_axis"]["value"]] == config["y_axis"]["scaling"]["column"]["x_value"])
 
             # check custom value is not zero
             elif not config["y_axis"]["scaling"].get("custom"):
-                raise RuntimeError("Invalid custom scaling value (cannot divide by {0}).".format(config["y_axis"]["scaling"].get("custom")))
+                raise RuntimeError("Invalid custom scaling value (cannot divide by {0})." \
+                                   .format(config["y_axis"]["scaling"].get("custom")))
 
             # apply data transformation per series
             if series_filters:
                 for f in series_filters:
                     m = self.row_filter(f, df)
-                    df[mask & m] = self.transform_axis(df[mask & m], mask & m, config["y_axis"], scaling_column, scaling_series_mask, scaling_x_value_mask)
+                    df[mask & m] = self.transform_axis(
+                        df[mask & m], mask & m, config["y_axis"], scaling_column,
+                        scaling_series_mask, scaling_x_value_mask)
             # apply data transformation to all data
             else:
-                df[mask] = self.transform_axis(df[mask], mask, config["y_axis"], scaling_column, scaling_series_mask, scaling_x_value_mask)
+                df[mask] = self.transform_axis(
+                    df[mask], mask, config["y_axis"], scaling_column,
+                    scaling_series_mask, scaling_x_value_mask)
 
         print("Selected dataframe:")
         print(df[columns][mask])
 
         # call a plotting script
-        self.plot_generic(config["title"], df[columns][mask], config["x_axis"], config["y_axis"], series_filters)
+        self.plot_generic(
+            config["title"], df[columns][mask], config["x_axis"], config["y_axis"], series_filters)
 
         if self.debug & self.verbose:
             print("")
@@ -252,7 +268,8 @@ def plot_generic(self, title, df: pd.DataFrame, x_axis, y_axis, series_filters):
         # combine group names for later plotting with groupby
         index_group_col = "_".join(groups)
         # group by group names (or just x-axis if no other groups are present)
-        grouped_df = df.groupby(x_column, sort=False) if len(groups) == 1 else df.groupby(groups, sort=False)
+        grouped_df = df.groupby(x_column, sort=False) if len(groups) == 1 \
+                     else df.groupby(groups, sort=False)
 
         if self.debug:
             print("")
@@ -267,13 +284,17 @@ def plot_generic(self, title, df: pd.DataFrame, x_axis, y_axis, series_filters):
                 else math.ceil(np.nanmax(df[y_column])*1.2)
 
         # create html file to store plot in
-        output_file(filename=os.path.join(Path(__file__).parent, "{0}.html".format(title.replace(" ", "_"))), title=title)
+        output_file(filename=os.path.join(
+            Path(__file__).parent, "{0}.html".format(title.replace(" ", "_"))), title=title)
 
         # create plot
-        plot = figure(x_range=grouped_df, y_range=(min_y, max_y), title=title, width=800, toolbar_location="above")
+        plot = figure(x_range=grouped_df, y_range=(min_y, max_y), title=title,
+                      width=800, toolbar_location="above")
         # configure tooltip
-        plot.add_tools(HoverTool(tooltips=[(y_label, "@{0}_mean".format(y_column)
-                                            + ("{%0.2f}" if pd.api.types.is_float_dtype(df[y_column].dtype) else ""))],
+        plot.add_tools(HoverTool(tooltips=
+                                 [(y_label, "@{0}_mean".format(y_column)
+                                   + ("{%0.2f}" if pd.api.types.is_float_dtype(df[y_column].dtype)
+                                      else ""))],
                                  formatters={"@{0}_mean".format(y_column) : "printf"}))
 
         # sort x-axis values in descending order (otherwise default sort is ascending)
@@ -289,7 +310,8 @@ def plot_generic(self, title, df: pd.DataFrame, x_axis, y_axis, series_filters):
         colour_factors = [str(x) for x in sorted(pd.Series(df[groups[-1]].unique(),
                                                            dtype=last_group_dtype))]
         # divide and assign colours
-        index_cmap = factor_cmap(index_group_col, palette=viridis(len(colour_factors)), factors=colour_factors, start=len(groups)-1, end=len(groups))
+        index_cmap = factor_cmap(index_group_col, palette=viridis(len(colour_factors)),
+                                 factors=colour_factors, start=len(groups)-1, end=len(groups))
         # add legend labels to data source
         data_source = ColumnDataSource(grouped_df).data
         legend_labels = ["{0} = {1}".format(groups[-1].replace("_", " "),
@@ -300,7 +322,8 @@ def plot_generic(self, title, df: pd.DataFrame, x_axis, y_axis, series_filters):
         # create legend outside plot
         plot.add_layout(Legend(), "right")
         # add bars
-        plot.vbar(x=index_group_col, top="{0}_mean".format(y_column), width=0.9, source=data_source, line_color="white", fill_color=index_cmap, legend_group="legend_labels", hover_alpha=0.9)
+        plot.vbar(x=index_group_col, top="{0}_mean".format(y_column), width=0.9, source=data_source,
+                  line_color="white", fill_color=index_cmap, legend_group="legend_labels", hover_alpha=0.9)
         # add labels
         plot.xaxis.axis_label = x_label
         plot.yaxis.axis_label = y_label
@@ -333,7 +356,8 @@ def plot_generic(self, title, df: pd.DataFrame, x_axis, y_axis, series_filters):
 
     def row_filter(self, filter, df: pd.DataFrame):
         """
-            Return a dataframe mask based on a filter condition. The filter is a list that contains a column name, an operator, and a value (e.g. ["flops_value", ">=", 1.0]).
+            Return a dataframe mask based on a filter condition. The filter is a list that
+            contains a column name, an operator, and a value (e.g. ["flops_value", ">=", 1.0]).
 
             Args:
                 filter: list, a condition based on which a dataframe is filtered.
@@ -368,7 +392,8 @@ def row_filter(self, filter, df: pd.DataFrame):
 
         return mask
 
-    def transform_axis(self, df: pd.DataFrame, df_mask, axis, scaling_column, scaling_series_mask, scaling_x_value_mask):
+    def transform_axis(self, df: pd.DataFrame, df_mask, axis, scaling_column,
+                       scaling_series_mask, scaling_x_value_mask):
         """
             Divide axis values by specified values and reflect this change in the dataframe.
 
@@ -427,18 +452,25 @@ def read_args():
         Return parsed command line arguments.
     """
 
-    parser = argparse.ArgumentParser(description="Plot benchmark data. At least one perflog must be supplied.")
+    parser = argparse.ArgumentParser(description="Plot benchmark data. \
+                                     At least one perflog must be supplied.")
 
     # required positional arguments (log path, config path)
-    parser.add_argument("log_path", type=str, help="path to a perflog file or a directory containing perflog files")
-    parser.add_argument("config_path", type=str, help="path to a configuration file specifying what to plot")
+    parser.add_argument("log_path", type=str,
+                        help="path to a perflog file or a directory containing perflog files")
+    parser.add_argument("config_path", type=str,
+                        help="path to a configuration file specifying what to plot")
 
     # optional argument (plot type)
-    parser.add_argument("-p", "--plot_type", type=str, default="generic", help="type of plot to be generated (default: 'generic')")
+    parser.add_argument("-p", "--plot_type", type=str, default="generic",
+                        help="type of plot to be generated (default: 'generic')")
 
     # info dump flags
-    parser.add_argument("-d", "--debug", action="store_true", help="debug flag for printing additional information")
-    parser.add_argument("-v", "--verbose", action="store_true", help="verbose flag for printing more debug information (must be used in conjunction with the debug flag)")
+    parser.add_argument("-d", "--debug", action="store_true",
+                        help="debug flag for printing additional information")
+    parser.add_argument("-v", "--verbose", action="store_true",
+                        help="verbose flag for printing more debug information \
+                            (must be used in conjunction with the debug flag)")
 
     return parser.parse_args()
 
@@ -483,9 +515,11 @@ def read_config(path):
 
     # check series length
     if config.get("series") is None:
-        raise KeyError("Missing series information (specify an empty list [] if there is only one series).")
+        raise KeyError(
+            "Missing series information (specify an empty list [] if there is only one series).")
     if len(config["series"]) == 1:
-        raise KeyError("Number of series must be >= 2 (specify an empty list [] if there is only one series).")
+        raise KeyError(
+            "Number of series must be >= 2 (specify an empty list [] if there is only one series).")
 
     # check filters are present
     if not config.get("filters"):
@@ -509,7 +543,8 @@ def read_perflog(path):
         Args:
             path: str, path to log file.
 
-        NB: This currently depends on having a non-default handlers_perflog.filelog.format in reframe's configuration. See code.
+        NB: This currently depends on having a non-default handlers_perflog.filelog.format
+            in reframe's configuration. See code.
 
         The returned dataframe will have columns for all fields in a performance log record
         except display name, extra resources, and env vars. Display name will be broken up
@@ -522,7 +557,8 @@ def read_perflog(path):
     REQUIRED_LOG_FIELDS = ["job_completion_time", r"\w+_value$", r"\w+_unit$", "display_name"]
 
     # look for required column matches
-    required_field_matches = [len(list(filter(re.compile(rexpr).match, df.columns))) > 0 for rexpr in REQUIRED_LOG_FIELDS]
+    required_field_matches = [len(list(filter(re.compile(rexpr).match, df.columns))) > 0
+                              for rexpr in REQUIRED_LOG_FIELDS]
     # check all required columns are present
     if False in required_field_matches:
         raise KeyError("Perflog missing one or more required fields", REQUIRED_LOG_FIELDS)
@@ -549,10 +585,13 @@ def read_perflog(path):
 
 def get_display_name_info(display_name):
     """
-        Return a tuple containing the test name and a dictionary of parameter names and their values from the given input string. The parameter dictionary may be empty if no parameters are present.
+        Return a tuple containing the test name and a dictionary of parameter names
+        and their values from the given input string. The parameter dictionary may be empty
+        if no parameters are present.
 
         Args:
-            display_name: str, expecting a format of <test_name> followed by zero or more %<param>=<value> pairs.
+            display_name: str, expecting a format of <test_name> followed by zero or more
+            %<param>=<value> pairs.
     """
 
     split_display_name = display_name.split(" %")
@@ -578,7 +617,8 @@ def insert_key_cols(df: pd.DataFrame, index, results):
 
 def get_axis_info(df: pd.DataFrame, axis, series_filters):
     """
-        Return the column name and label for a given axis. If a column name is supplied as units information, the actual units will be extracted from a dataframe.
+        Return the column name and label for a given axis. If a column name is supplied as
+        units information, the actual units will be extracted from a dataframe.
 
         Args:
             df: dataframe, data to plot.
@@ -592,8 +632,8 @@ def get_axis_info(df: pd.DataFrame, axis, series_filters):
     if axis.get("units").get("column"):
         unit_set = set(df[axis["units"]["column"]].dropna())
         # check all rows have the same units
-        #if len(unit_set) != 1:
-        #    raise RuntimeError("Unexpected number of axis unit entries {0}".format(unit_set))
+        if len(unit_set) != 1:
+            raise RuntimeError("Unexpected number of axis unit entries {0}".format(unit_set))
         units = next(iter(unit_set))
 
     # get scaling information
@@ -612,7 +652,8 @@ def get_axis_info(df: pd.DataFrame, axis, series_filters):
 
     # determine axis label
     label = "{0}{1}{2}".format(titlecase(col_name.replace("_", " ")),
-                               titlecase(" Scaled by {0}".format(scaling.replace("_", " "))) if scaling else "",
+                               titlecase(" Scaled by {0}".format(scaling.replace("_", " ")))
+                               if scaling else "",
                                " ({0})".format(units) if units else "")
 
     return col_name, label

From 28deebde93815c69a0b61fc198a21e1e86c7ed0d Mon Sep 17 00:00:00 2001
From: pineapple-cat <em.dubrovska@gmail.com>
Date: Fri, 15 Dec 2023 17:36:13 +0000
Subject: [PATCH 32/35] Fixed grouped (x, series) sorting for non-string data.

---
 post-processing/post_processing.py | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/post-processing/post_processing.py b/post-processing/post_processing.py
index 85fd2cc1..739d50e6 100644
--- a/post-processing/post_processing.py
+++ b/post-processing/post_processing.py
@@ -302,9 +302,23 @@ def plot_generic(self, title, df: pd.DataFrame, x_axis, y_axis, series_filters):
         if x_axis.get("sort"):
             if x_axis["sort"] == "descending":
                 reverse = True
-        plot.x_range.factors = sorted(plot.x_range.factors, reverse=reverse,
-                                      key=lambda x: pd.Series(x[0] if len(groups) > 1 else x,
-                                                              dtype=x_col_dtype).iloc[0])
+
+        if len(groups) > 1:
+            # sort by x-axis values first
+            plot.x_range.factors = sorted(plot.x_range.factors, reverse=reverse,
+                                          key=lambda x: pd.Series(x[0], dtype=x_col_dtype).iloc[0])
+            # get series values with their original dtype
+            # NOTE: currently not accounting for more than one series column
+            series_values = [pd.Series(x[-1], dtype=last_group_dtype).iloc[0]
+                             for x in plot.x_range.factors]
+            # sort x-axis groups by series
+            sorted_x_items = [x[1] for x in sorted(zip(series_values, plot.x_range.factors),
+                                                   reverse=reverse, key=lambda x: x[0])]
+            plot.x_range.factors = sorted_x_items
+        else:
+            # sort only by x-axis values
+            plot.x_range.factors = sorted(plot.x_range.factors, reverse=reverse,
+                                          key=lambda x: pd.Series(x, dtype=x_col_dtype).iloc[0])
 
         # automatically base bar colouring on last group column
         colour_factors = [str(x) for x in sorted(pd.Series(df[groups[-1]].unique(),
@@ -452,8 +466,8 @@ def read_args():
         Return parsed command line arguments.
     """
 
-    parser = argparse.ArgumentParser(description="Plot benchmark data. \
-                                     At least one perflog must be supplied.")
+    parser = argparse.ArgumentParser(
+        description="Plot benchmark data. At least one perflog must be supplied.")
 
     # required positional arguments (log path, config path)
     parser.add_argument("log_path", type=str,

From 7e24217a1b47aa0ecc47cb8fd223443b662c503e Mon Sep 17 00:00:00 2001
From: pineapple-cat <em.dubrovska@gmail.com>
Date: Fri, 15 Dec 2023 18:22:33 +0000
Subject: [PATCH 33/35] Adjusted grouped (x, series) sorting to ensure series
 sorting is secondary to x-value sorting.

---
 post-processing/post_processing.py | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/post-processing/post_processing.py b/post-processing/post_processing.py
index 739d50e6..2379b4e4 100644
--- a/post-processing/post_processing.py
+++ b/post-processing/post_processing.py
@@ -303,22 +303,20 @@ def plot_generic(self, title, df: pd.DataFrame, x_axis, y_axis, series_filters):
             if x_axis["sort"] == "descending":
                 reverse = True
 
+        # sort x-axis groups by series first
         if len(groups) > 1:
-            # sort by x-axis values first
-            plot.x_range.factors = sorted(plot.x_range.factors, reverse=reverse,
-                                          key=lambda x: pd.Series(x[0], dtype=x_col_dtype).iloc[0])
             # get series values with their original dtype
             # NOTE: currently not accounting for more than one series column
             series_values = [pd.Series(x[-1], dtype=last_group_dtype).iloc[0]
                              for x in plot.x_range.factors]
-            # sort x-axis groups by series
             sorted_x_items = [x[1] for x in sorted(zip(series_values, plot.x_range.factors),
                                                    reverse=reverse, key=lambda x: x[0])]
             plot.x_range.factors = sorted_x_items
-        else:
-            # sort only by x-axis values
-            plot.x_range.factors = sorted(plot.x_range.factors, reverse=reverse,
-                                          key=lambda x: pd.Series(x, dtype=x_col_dtype).iloc[0])
+
+        # sort by x-axis values
+        plot.x_range.factors = sorted(plot.x_range.factors, reverse=reverse,
+                                      key=lambda x: pd.Series(x[0] if len(groups) > 1 else x,
+                                                              dtype=x_col_dtype).iloc[0])
 
         # automatically base bar colouring on last group column
         colour_factors = [str(x) for x in sorted(pd.Series(df[groups[-1]].unique(),

From 7119ce7a5168013207cf14a7936417de3079ab68 Mon Sep 17 00:00:00 2001
From: pineapple-cat <em.dubrovska@gmail.com>
Date: Mon, 18 Dec 2023 18:41:25 +0000
Subject: [PATCH 34/35] Added some README clarifications + a config template.

---
 post-processing/README.md | 112 ++++++++++++++++++++++++++++----------
 1 file changed, 82 insertions(+), 30 deletions(-)

diff --git a/post-processing/README.md b/post-processing/README.md
index 3a9cee50..b72fb0e5 100644
--- a/post-processing/README.md
+++ b/post-processing/README.md
@@ -36,7 +36,7 @@ Run `post_processing.py -h` for more information (including debugging flags).
 
 ### Configuration Structure
 
-Before running post-processing, create a config file including all necessary information for graph generation (you must specify at least plot title, x-axis, y-axis, and column types). See below for an example and some clarifying notes.
+Before running post-processing, create a config file including all necessary information for graph generation (you must specify at least plot title, x-axis, y-axis, and column types). See below for a template, an example, and some clarifying notes.
 
 - `title` - Plot title.
 - `x_axis`, `y_axis` - Axis information.
@@ -54,8 +54,58 @@ Before running post-processing, create a config file including all necessary inf
 - `column_types` - Pandas dtype for each relevant column (axes, units, filters, series). Specified with a dictionary.
   - `Accepted types: "str"/"string"/"object", "int"/"int64", "float"/"float64", "datetime"/"datetime64"`
 
+### Complete Config Template
+
+This template includes all possible config fields, some of which are optional or mutually exclusive (e.g. `column` and `custom`).
+
+```yaml
+title: <custom_label>
+
+x_axis:
+  value: <column_name>
+  # use one of 'column' or 'custom'
+  units:
+    column: <column_name>
+    custom: <custom_label>
+  # optional (default: ascending)
+  sort: "descending"
+
+y_axis:
+  value: <column_name>
+  # use one of 'column' or 'custom'
+  units:
+    column: <column_name>
+    custom: <custom_label>
+  # optional (default: no data transformation)
+  # use one of 'column' or 'custom'
+  scaling:
+    column:
+      name: <column_name>
+      series: <index>
+      x_value: <column_value>
+    custom: <custom_value>
+
+# optional (default: include all data)
+# entry format: [<column_name>, <operator>, <column_value>]
+# accepted operators: ==, !=, <, >, <=, >=
+filters:
+  and: <condition_list>
+  or: <condition_list>
+
+# optional (default: no x-axis grouping, one plot per graph)
+# entry format: [<column_name>, <column_value>]
+series: <series_list>
+
+# include types for each column that is used in the config
+# accepted types: string/object, int, float, datetime
+column_types:
+  <column_name>: <column_type>
+```
+
 ### Example Config
 
+This example more accurately illustrates what an actual config file may look like.
+
 ```yaml
 title: "Plot Title"
 
@@ -91,7 +141,7 @@ column_types:
   series_col: "str"
 ```
 
-#### A Note on X-axis Grouping
+#### X-axis Grouping
 
 The settings above will produce a graph that will have its x-axis data grouped based on the values in `x_axis_col` and `series_col`. (`Note: only groupings with one series column are currently supported.`) If we imagine that `x_axis_col` has two unique values, `"x_val_1"` and `"x_val_2"`, there will be four groups (and four bars) along the x-axis:
 
@@ -100,7 +150,7 @@ The settings above will produce a graph that will have its x-axis data grouped b
 - (`x_val_2`, `series_val_1`)
 - (`x_val_2`, `series_val_2`)
 
-#### A Note on Scaling
+#### Scaling
 
 When axis values are scaled, they are all divided by a number or a list of numbers. If using more than one number for scaling, the length of the list must match the length of the axis column being scaled. (`Note: scaling is currently only supported for y-axis data, as graphs with a non-categorical x-axis are still a work in progress.`)
 
@@ -119,12 +169,12 @@ y_axis:
 
 In the snippet above, all y-axis values are to be divided by 2.
 
-|y_axis_col|scaled_y_axis_col|
-|-|-|
-|3.2|3.2 / 2.0 = 1.6|
-|5.4|5.4 / 2.0 = 2.7|
-|2.4|2.4 / 2.0 = 1.2|
-|5.0|5.0 / 2.0 = 2.5|
+|y_axis_col||scaled_y_axis_col|
+|-|-|-|
+|3.2|3.2 / 2.0 =|1.6|
+|5.4|5.4 / 2.0 =|2.7|
+|2.4|2.4 / 2.0 =|1.2|
+|5.0|5.0 / 2.0 =|2.5|
 
 **Column Scaling**
 
@@ -142,12 +192,12 @@ y_axis:
 
 In the snippet above, all y-axis values are to be divided by the corresponding values in the scaling column.
 
-|y_axis_col|scaling_col|scaled_y_axis_col|
-|-|-|-|
-|3.2|**`1.6`**|3.2 / 1.6 = 2.0|
-|5.4|**`2.0`**|5.4 / 2.0 = 2.7|
-|2.4|**`0.6`**|2.4 / 0.6 = 4.0|
-|5.0|**`2.5`**|5.0 / 2.5 = 2.0|
+|y_axis_col|scaling_col||scaled_y_axis_col|
+|-|-|-|-|
+|3.2|**`1.6`**|3.2 / 1.6 =|2.0|
+|5.4|**`2.0`**|5.4 / 2.0 =|2.7|
+|2.4|**`0.6`**|2.4 / 0.6 =|4.0|
+|5.0|**`2.5`**|5.0 / 2.5 =|2.0|
 
 **Series Scaling**
 
@@ -168,12 +218,12 @@ y_axis:
 
 In the snippet above, all y-axis values are to be split by series and divided by the corresponding values in the scaling series.
 
-|y_axis_col|scaling_col|series_col|scaled_y_axis_col|
-|-|-|-|-|
-|3.2|**`1.6`**|`series_val_1`|3.2 / 1.6 = 2.0|
-|5.4|**`2.0`**|`series_val_1`|5.4 / 2.0 = 2.7|
-|2.4|0.6|series_val_2|2.4 / 1.6 = 1.5|
-|5.0|2.5|series_val_2|5.0 / 2.0 = 2.5|
+|y_axis_col|scaling_col|series_col||scaled_y_axis_col|
+|-|-|-|-|-|
+|3.2|**`1.6`**|`series_val_1`|3.2 / 1.6 =|2.0|
+|5.4|**`2.0`**|`series_val_1`|5.4 / 2.0 =|2.7|
+|2.4|0.6|series_val_2|2.4 / 1.6 =|1.5|
+|5.0|2.5|series_val_2|5.0 / 2.0 =|2.5|
 
 **Selected Value Scaling**
 
@@ -193,25 +243,27 @@ y_axis:
 
 In the snippet above, all y-axis values are to be divided by the scaling value found by filtering the scaling column by both series and x-axis value.
 
-|x_axis_col|y_axis_col|scaling_col|series_col|scaled_y_axis_col|
-|-|-|-|-|-|
-|x_val_1|3.2|1.6|series_val_1|3.2 / 2.0 = 1.6|
-|`x_val_s`|5.4|**`2.0`**|`series_val_1`|5.4 / 2.0 = 2.7|
-|x_val_2|2.4|0.7|series_val_2|2.4 / 2.0 = 1.2|
-|x_val_s|5.0|2.5|series_val_2|5.0 / 2.0 = 2.5|
+|x_axis_col|y_axis_col|scaling_col|series_col||scaled_y_axis_col|
+|-|-|-|-|-|-|
+|x_val_1|3.2|1.6|series_val_1|3.2 / 2.0 =|1.6|
+|`x_val_s`|5.4|**`2.0`**|`series_val_1`|5.4 / 2.0 =|2.7|
+|x_val_2|2.4|0.7|series_val_2|2.4 / 2.0 =|1.2|
+|x_val_s|5.0|2.5|series_val_2|5.0 / 2.0 =|2.5|
 
 (`Note: if series are not present and x-axis values are all unique, it is enough to specify just the column name and x-value.`)
 
-#### A Note on Filters
+#### Filters
 
-AND filters and OR filters are combined with a logical AND to produce the final filter mask applied to the DataFrame prior to graphing. For example:
+A condition list for filtering has entries in the format `[<column_name>, <operator>, <column_value>]`. AND filters and OR filters are combined with a logical AND to produce the final filter mask applied to the DataFrame prior to graphing. For example:
 
 - `and_filters` = `cond1`, `cond2`
 - `or_filters`= `cond3`, `cond4`
 
 The filters above would produce the final filter `mask` = (`cond1` AND `cond2`) AND (`cond3` OR `cond4`).
 
-#### A Note on Column Types
+#### Column Types
+
+Types must be specified for all columns included in the config in the format `<column_name>:<column_type>`. Accepted types include `string/object`, `int`, `float`, and `datetime`.
 
 All user-specified types are internally converted to their nullable incarnations. As such:
 

From 49884d83a6e19cb52374c4ff0dd0bb0616ceb0bc Mon Sep 17 00:00:00 2001
From: pineapple-cat <em.dubrovska@gmail.com>
Date: Tue, 19 Dec 2023 16:26:15 +0000
Subject: [PATCH 35/35] Rehomed note on replaced reframe columns.

---
 post-processing/README.md | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/post-processing/README.md b/post-processing/README.md
index b72fb0e5..4d3ed4a4 100644
--- a/post-processing/README.md
+++ b/post-processing/README.md
@@ -54,6 +54,12 @@ Before running post-processing, create a config file including all necessary inf
 - `column_types` - Pandas dtype for each relevant column (axes, units, filters, series). Specified with a dictionary.
   - `Accepted types: "str"/"string"/"object", "int"/"int64", "float"/"float64", "datetime"/"datetime64"`
 
+#### A Note on Replaced ReFrame Columns
+
+A perflog contains certain columns that will not be present in the DataFrame available to the graphing script. Currently, these columns are `display_name`, `extra_resources`, and `env_vars`. Removed columns should not be referenced in a plot config file.
+
+When the row contents of `display_name` are parsed, they are separated into their constituent benchmark names and parameters. This column is replaced with a new `test_name` column and new parameter columns (if present). Similarly, the `extra_resources` and `env_vars` columns are replaced with their respective dictionary row contents (keys become columns, values become row contents).
+
 ### Complete Config Template
 
 This template includes all possible config fields, some of which are optional or mutually exclusive (e.g. `column` and `custom`).
@@ -272,12 +278,6 @@ All user-specified types are internally converted to their nullable incarnations
 - Integers are treated as `Int64`.
 - Datetimes are treated as `datetime64[ns]`.
 
-#### A Note on Replaced ReFrame Columns
-
-A perflog contains certain columns that will not be present in the DataFrame available to the graphing script. Currently, these columns are `display_name`, `extra_resources`, and `env_vars`. Removed columns should not be referenced in a plot config file.
-
-When the row contents of `display_name` are parsed, they are separated into their constituent benchmark names and parameters. This column is replaced with a new `test_name` column and new parameter columns (if present). Similarly, the `extra_resources` and `env_vars` columns are replaced with their respective dictionary row contents (keys become columns, values become row contents).
-
 ### Future Development
 
 The post-processing capabilities are still a work in progress. Some upcoming developments: