From 26cf20e8dbbd151c21d149c3ab5f0630e4341e9f Mon Sep 17 00:00:00 2001
From: James Bartlett <jamesbartlett@pixielabs.ai>
Date: Mon, 14 Aug 2023 10:51:29 -0700
Subject: [PATCH] [perf_tool] Add templates for datastudio charts and queries.
 (#1671)

Summary: Adds the templates used to create the datastudio visualizations
for the perf_tool.

Type of change: /kind cleanup

Test Plan: N/A (no effect on relevant code, just checked in to make it
easier to change the datastudio visualizations in the future).

Signed-off-by: James Bartlett <jamesbartlett@pixielabs.ai>
---
 src/e2e_test/perf_tool/cmd/BUILD.bazel        |   2 +
 src/e2e_test/perf_tool/cmd/datastudio.go      |  61 ++++
 src/e2e_test/perf_tool/datastudio/BUILD.bazel |  36 +++
 .../perf_tool/datastudio/datastudio.go        | 296 ++++++++++++++++++
 .../templates/charts/experiment/bytes.json    | 204 ++++++++++++
 .../templates/charts/experiment/percent.json  | 199 ++++++++++++
 .../templates/charts/suite/bytes.json         | 133 ++++++++
 .../templates/charts/suite/percent.json       | 132 ++++++++
 .../all_suites_workloads_parameters.sql       |  62 ++++
 .../queries/cpu_usage_preprocessing.sql       |  19 ++
 .../templates/queries/experiment_view.sql     | 173 ++++++++++
 .../templates/queries/suite_view.sql          | 251 +++++++++++++++
 .../queries/suite_view_app_overhead.sql       | 269 ++++++++++++++++
 13 files changed, 1837 insertions(+)
 create mode 100644 src/e2e_test/perf_tool/cmd/datastudio.go
 create mode 100644 src/e2e_test/perf_tool/datastudio/BUILD.bazel
 create mode 100644 src/e2e_test/perf_tool/datastudio/datastudio.go
 create mode 100644 src/e2e_test/perf_tool/datastudio/templates/charts/experiment/bytes.json
 create mode 100644 src/e2e_test/perf_tool/datastudio/templates/charts/experiment/percent.json
 create mode 100644 src/e2e_test/perf_tool/datastudio/templates/charts/suite/bytes.json
 create mode 100644 src/e2e_test/perf_tool/datastudio/templates/charts/suite/percent.json
 create mode 100644 src/e2e_test/perf_tool/datastudio/templates/queries/all_suites_workloads_parameters.sql
 create mode 100644 src/e2e_test/perf_tool/datastudio/templates/queries/cpu_usage_preprocessing.sql
 create mode 100644 src/e2e_test/perf_tool/datastudio/templates/queries/experiment_view.sql
 create mode 100644 src/e2e_test/perf_tool/datastudio/templates/queries/suite_view.sql
 create mode 100644 src/e2e_test/perf_tool/datastudio/templates/queries/suite_view_app_overhead.sql

diff --git a/src/e2e_test/perf_tool/cmd/BUILD.bazel b/src/e2e_test/perf_tool/cmd/BUILD.bazel
index 2fe1bb9ff19..012fd3488b0 100644
--- a/src/e2e_test/perf_tool/cmd/BUILD.bazel
+++ b/src/e2e_test/perf_tool/cmd/BUILD.bazel
@@ -19,6 +19,7 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library")
 go_library(
     name = "cmd",
     srcs = [
+        "datastudio.go",
         "github_matrix.go",
         "root.go",
         "run.go",
@@ -27,6 +28,7 @@ go_library(
     importpath = "px.dev/pixie/src/e2e_test/perf_tool/cmd",
     visibility = ["//visibility:public"],
     deps = [
+        "//src/e2e_test/perf_tool/datastudio",
         "//src/e2e_test/perf_tool/experimentpb:experiment_pl_go_proto",
         "//src/e2e_test/perf_tool/pkg/cluster",
         "//src/e2e_test/perf_tool/pkg/cluster/gke",
diff --git a/src/e2e_test/perf_tool/cmd/datastudio.go b/src/e2e_test/perf_tool/cmd/datastudio.go
new file mode 100644
index 00000000000..b622824d271
--- /dev/null
+++ b/src/e2e_test/perf_tool/cmd/datastudio.go
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2018- The Pixie Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package cmd
+
+import (
+	log "github.com/sirupsen/logrus"
+	"github.com/spf13/cobra"
+	"github.com/spf13/viper"
+
+	"px.dev/pixie/src/e2e_test/perf_tool/datastudio"
+)
+
+// GenerateDatastudioCmd generates the queries and charts to use in datastudio.
+var GenerateDatastudioCmd = &cobra.Command{
+	Use:   "generate_datastudio",
+	Short: "Generate the sql queries and vega charts to use in datastudio.",
+	PreRun: func(cmd *cobra.Command, args []string) {
+		viper.BindPFlags(cmd.Flags())
+	},
+	Run: func(cmd *cobra.Command, args []string) {
+		generateDatastudioCmd(cmd)
+	},
+}
+
+func init() {
+	GenerateDatastudioCmd.Flags().StringP("output", "o", "", "Path to put generated queries and charts")
+	GenerateDatastudioCmd.Flags().String("bq_project", "", "Gcloud project containing bq dataset to query from.")
+	GenerateDatastudioCmd.Flags().String("bq_dataset", "", "BQ dataset to query from.")
+	GenerateDatastudioCmd.Flags().String("ds_report_id", "", "The unique ID of the datastudio report, used for self-links in the datastudio charts")
+	GenerateDatastudioCmd.Flags().String("ds_experiment_page_id", "", "The unique ID of the datastudio experiment page, used for self-links in the datastudio charts")
+
+	RootCmd.AddCommand(GenerateDatastudioCmd)
+}
+
+func generateDatastudioCmd(*cobra.Command) {
+	outPath := viper.GetString("output")
+	project := viper.GetString("bq_project")
+	datasetName := viper.GetString("bq_dataset")
+	reportID := viper.GetString("ds_report_id")
+	experimentPageID := viper.GetString("ds_experiment_page_id")
+
+	if err := datastudio.GenerateViews(outPath, project, datasetName, reportID, experimentPageID); err != nil {
+		log.WithError(err).Fatal("failed to generate views")
+	}
+}
diff --git a/src/e2e_test/perf_tool/datastudio/BUILD.bazel b/src/e2e_test/perf_tool/datastudio/BUILD.bazel
new file mode 100644
index 00000000000..01ee5f5005b
--- /dev/null
+++ b/src/e2e_test/perf_tool/datastudio/BUILD.bazel
@@ -0,0 +1,36 @@
+# Copyright 2018- The Pixie Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+load("@io_bazel_rules_go//go:def.bzl", "go_library")
+
+go_library(
+    name = "datastudio",
+    srcs = ["datastudio.go"],
+    embedsrcs = [
+        "templates/charts/experiment/bytes.json",
+        "templates/charts/experiment/percent.json",
+        "templates/charts/suite/bytes.json",
+        "templates/charts/suite/percent.json",
+        "templates/queries/all_suites_workloads_parameters.sql",
+        "templates/queries/experiment_view.sql",
+        "templates/queries/suite_view.sql",
+        "templates/queries/cpu_usage_preprocessing.sql",
+        "templates/queries/suite_view_app_overhead.sql",
+    ],
+    importpath = "px.dev/pixie/src/e2e_test/perf_tool/datastudio",
+    visibility = ["//visibility:public"],
+    deps = ["@com_github_masterminds_sprig_v3//:sprig"],
+)
diff --git a/src/e2e_test/perf_tool/datastudio/datastudio.go b/src/e2e_test/perf_tool/datastudio/datastudio.go
new file mode 100644
index 00000000000..9d024c901d3
--- /dev/null
+++ b/src/e2e_test/perf_tool/datastudio/datastudio.go
@@ -0,0 +1,296 @@
+/*
+ * Copyright 2018- The Pixie Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package datastudio
+
+import (
+	// embed is used to load chart/query templates.
+	_ "embed"
+	"fmt"
+	"os"
+	"path"
+	"path/filepath"
+	"strings"
+	"text/template"
+
+	"github.com/Masterminds/sprig/v3"
+)
+
+// GenerateViews generates all of the datastudio charts/queries from templates.
+func GenerateViews(outPath string, project string, dataset string, reportID string, expPageID string) error {
+	queryOutPath := path.Join(outPath, "queries")
+	if err := os.MkdirAll(queryOutPath, 0775); err != nil {
+		return err
+	}
+	chartOutPath := path.Join(outPath, "charts")
+	if err := os.MkdirAll(chartOutPath, 0775); err != nil {
+		return err
+	}
+
+	for _, vals := range suiteViews {
+		vals.Project = project
+		vals.Dataset = dataset
+		vals.DSReportID = reportID
+		vals.DSExperimentPageID = expPageID
+		path := filepath.Join(queryOutPath, fmt.Sprintf("suite_%s.sql", vals.MetricName))
+		err := executeTemplate(vals, suiteTemplate, path)
+		if err != nil {
+			return err
+		}
+
+		path = filepath.Join(chartOutPath, fmt.Sprintf("suite_%s.json", vals.MetricName))
+		templ := suiteChartTemplates[vals.Unit]
+		if err := executeTemplate(vals, templ, path); err != nil {
+			return err
+		}
+	}
+
+	for _, vals := range appOverheadSuiteViews {
+		vals.Project = project
+		vals.Dataset = dataset
+		vals.DSReportID = reportID
+		vals.DSExperimentPageID = expPageID
+		path := filepath.Join(queryOutPath, fmt.Sprintf("suite_app_overhead_%s.sql", vals.MetricName))
+		err := executeTemplate(vals, suiteAppOverheadTemplate, path)
+		if err != nil {
+			return err
+		}
+
+		path = filepath.Join(chartOutPath, fmt.Sprintf("suite_app_overhead_%s.json", vals.MetricName))
+		templ := suiteChartTemplates[vals.Unit]
+		if err := executeTemplate(vals, templ, path); err != nil {
+			return err
+		}
+	}
+
+	for _, vals := range experimentViews {
+		vals.Project = project
+		vals.Dataset = dataset
+		name := strings.Join(vals.MetricNames, "_")
+		path := filepath.Join(queryOutPath, fmt.Sprintf("experiment_%s.sql", name))
+		if err := executeTemplate(vals, experimentTemplate, path); err != nil {
+			return err
+		}
+
+		path = filepath.Join(chartOutPath, fmt.Sprintf("experiment_%s.json", name))
+		templ := expChartTemplates[vals.Unit]
+		if err := executeTemplate(vals, templ, path); err != nil {
+			return err
+		}
+	}
+
+	path := filepath.Join(queryOutPath, "all_suites_workloads_parameters.sql")
+	vals := &struct {
+		Project string
+		Dataset string
+	}{
+		Project: project,
+		Dataset: dataset,
+	}
+	if err := executeTemplate(vals, allSuitesWorkloadsParametersTemplate, path); err != nil {
+		return err
+	}
+	return nil
+}
+
+//go:embed templates/queries/all_suites_workloads_parameters.sql
+var allSuitesWorkloadsParametersTemplate string
+
+//go:embed templates/queries/suite_view.sql
+var suiteTemplate string
+
+//go:embed templates/queries/suite_view_app_overhead.sql
+var suiteAppOverheadTemplate string
+
+//go:embed templates/queries/experiment_view.sql
+var experimentTemplate string
+
+//go:embed templates/charts/suite/bytes.json
+var suiteChartByteTemplate string
+
+//go:embed templates/charts/suite/percent.json
+var suiteChartPercentTemplate string
+
+//go:embed templates/charts/experiment/percent.json
+var expChartPercentTemplate string
+
+//go:embed templates/charts/experiment/bytes.json
+var expChartByteTemplate string
+
+//go:embed templates/queries/cpu_usage_preprocessing.sql
+var cpuUsagePreprocessing string
+
+var suiteChartTemplates = map[metricUnit]string{
+	bytes:   suiteChartByteTemplate,
+	percent: suiteChartPercentTemplate,
+}
+
+var expChartTemplates = map[metricUnit]string{
+	bytes:   expChartByteTemplate,
+	percent: expChartPercentTemplate,
+}
+
+type metricUnit string
+
+const (
+	bytes   metricUnit = "bytes"
+	percent metricUnit = "percent"
+)
+
+type suiteViewTemplateVals struct {
+	ChartTitle          string
+	MetricName          string
+	MetricsUsed         []string
+	MetricSelectExpr    string
+	TimeAgg             string
+	Unit                metricUnit
+	CustomPreprocessing string
+
+	DSReportID         string
+	DSExperimentPageID string
+
+	Project string
+	Dataset string
+}
+
+var suiteViews = []*suiteViewTemplateVals{
+	{
+		ChartTitle:          "CPU Usage",
+		MetricName:          "cpu_usage",
+		MetricsUsed:         []string{"cpu_usage", "cpu_seconds_counter"},
+		MetricSelectExpr:    "r.cpu_usage",
+		TimeAgg:             "",
+		Unit:                percent,
+		CustomPreprocessing: cpuUsagePreprocessing,
+	},
+	{
+		ChartTitle:       "Max Heap Usage (ignoring table store)",
+		MetricName:       "max_memory_ex_table",
+		MetricsUsed:      []string{"heap_size_bytes", "table_size"},
+		MetricSelectExpr: "r.heap_size_bytes - r.table_size",
+		TimeAgg:          "max(max_memory_ex_table)",
+		Unit:             bytes,
+	},
+	{
+		ChartTitle:       "Max Heap Usage",
+		MetricName:       "max_heap_size",
+		MetricsUsed:      []string{"heap_size_bytes"},
+		MetricSelectExpr: "r.heap_size_bytes",
+		TimeAgg:          "max(max_heap_size)",
+		Unit:             bytes,
+	},
+	{
+		ChartTitle:       "Max RSS Memory Usage",
+		MetricName:       "max_rss",
+		MetricsUsed:      []string{"rss"},
+		MetricSelectExpr: "r.rss",
+		TimeAgg:          "max(max_rss)",
+		Unit:             bytes,
+	},
+	{
+		ChartTitle:       "HTTP Data Loss",
+		MetricName:       "http_data_loss",
+		MetricsUsed:      []string{"http_data_loss"},
+		MetricSelectExpr: "r.http_data_loss",
+		TimeAgg:          "array_agg(http_data_loss ORDER BY ts DESC LIMIT 1)[OFFSET(0)]",
+		Unit:             percent,
+	},
+}
+
+var appOverheadSuiteViews = []*suiteViewTemplateVals{
+	{
+		ChartTitle:          "Application CPU Overhead (% increase over baseline)",
+		MetricName:          "cpu_usage",
+		MetricsUsed:         []string{"cpu_usage", "cpu_seconds_counter"},
+		MetricSelectExpr:    "r.cpu_usage",
+		TimeAgg:             "avg(cpu_usage)",
+		Unit:                percent,
+		CustomPreprocessing: cpuUsagePreprocessing,
+	},
+	{
+		ChartTitle:       "Application RSS Overhead (% increase over baseline)",
+		MetricName:       "max_rss",
+		MetricsUsed:      []string{"rss"},
+		MetricSelectExpr: "r.rss",
+		TimeAgg:          "max(max_rss)",
+		Unit:             percent,
+	},
+}
+
+type experimentViewTemplateVals struct {
+	ChartTitle          string
+	MetricNames         []string
+	MetricExprs         []string
+	MetricsUsed         []string
+	Unit                metricUnit
+	CustomPreprocessing string
+
+	Project string
+	Dataset string
+}
+
+var experimentViews = []*experimentViewTemplateVals{
+	{
+		ChartTitle:          "CPU Usage",
+		MetricNames:         []string{"cpu_usage"},
+		MetricExprs:         []string{"r.cpu_usage"},
+		MetricsUsed:         []string{"cpu_seconds_counter", "cpu_usage"},
+		Unit:                percent,
+		CustomPreprocessing: cpuUsagePreprocessing,
+	},
+	// TODO(james): combine the memory stats into one chart.
+	{
+		ChartTitle:  "Heap (ignoring table store)",
+		MetricNames: []string{"heap_ex_table_store"},
+		MetricsUsed: []string{"heap_size_bytes", "table_size"},
+		MetricExprs: []string{"r.heap_size_bytes - r.table_size"},
+		Unit:        bytes,
+	},
+	{
+		ChartTitle:  "RSS",
+		MetricNames: []string{"rss"},
+		MetricsUsed: []string{"rss"},
+		MetricExprs: []string{"r.rss"},
+		Unit:        bytes,
+	},
+	{
+		ChartTitle:  "HTTP Data Loss",
+		MetricNames: []string{"http_data_loss"},
+		MetricsUsed: []string{"http_data_loss"},
+		MetricExprs: []string{"r.http_data_loss"},
+		Unit:        percent,
+	},
+}
+
+func executeTemplate(vals interface{}, templ string, outputPath string) error {
+	t, err := template.New("").Funcs(sprig.TxtFuncMap()).Parse(templ)
+	if err != nil {
+		return err
+	}
+	f, err := os.Create(outputPath)
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+
+	err = t.Execute(f, vals)
+	if err != nil {
+		return err
+	}
+	return nil
+}
diff --git a/src/e2e_test/perf_tool/datastudio/templates/charts/experiment/bytes.json b/src/e2e_test/perf_tool/datastudio/templates/charts/experiment/bytes.json
new file mode 100644
index 00000000000..c93b69b3a7d
--- /dev/null
+++ b/src/e2e_test/perf_tool/datastudio/templates/charts/experiment/bytes.json
@@ -0,0 +1,204 @@
+{
+  "$schema": "https://vega.github.io/schema/vega/v5.json",
+  "title": "{{.ChartTitle}}",
+  "data": [
+    {
+      "name": "default",
+      "transform": [
+        {
+          "type": "project",
+          "fields": [
+            "$dimension0",
+            "$dimension1",
+            "$dimension2",
+            "$dimension3",
+            "$dimension4",
+            "$dimension5",
+            "$dimension6",
+            "$metric0"
+          ],
+          "as": [
+            "experiment_id",
+            "pod",
+            "ts",
+            "action_name",
+            "action_type",
+            "action_begin",
+            "action_ts",
+            "bytes_value"
+          ]
+        }
+      ]
+    },
+    {
+      "name": "data",
+      "source": "default",
+      "transform": [
+        {
+          "type": "filter",
+          "expr": "isValid(datum.experiment_id) && isValid(datum.ts) && isValid(datum.bytes_value)"
+        },
+        {
+          "type": "collect",
+          "sort": {"field": "ts"}
+        },
+        {
+          "type": "formula",
+          "expr": "datum.bytes_value/1024/1024",
+          "as": "value_mb"
+        },
+        {
+          "type": "formula",
+          "expr": "(datum.pod)?datum.pod:\"\"",
+          "as": "pod"
+        },
+        {
+          "type": "formula",
+          "expr": "datum.experiment_id + datum.pod",
+          "as": "experiment_id_pod"
+        }
+      ]
+    },
+    {
+      "name": "actions",
+      "source": "default",
+      "transform": [
+        {
+          "type": "filter",
+          "expr": "isValid(datum.action_type) && isValid(datum.action_ts)"
+        },
+        {
+          "type": "filter",
+          "expr": "datum.action_type == 'run'"
+        }
+      ]
+    }
+  ],
+  "scales": [
+    {
+      "name": "seconds",
+      "type": "linear",
+      "range": "width",
+      "domain": {
+        "fields": [
+          {"data": "data", "field": "ts"},
+          {"data": "actions", "field": "action_ts"}
+        ]
+      }
+    },
+    {
+      "name": "bytes",
+      "type": "linear",
+      "range": "height",
+      "nice": true,
+      "domain": {"data": "data", "field": "value_mb"}
+    },
+    {
+      "name": "color",
+      "type": "ordinal",
+      "range": "category",
+      "domain": {
+        "data": "data",
+        "field": "experiment_id_pod",
+        "sort": true
+      }
+    }
+  ],
+  "axes": [
+    {
+      "orient": "bottom",
+      "scale": "seconds",
+      "title": "Time Elapsed (s)"
+    },
+    {
+      "orient": "left",
+      "scale": "bytes",
+      "title": "MiB"
+    }
+  ],
+  "marks": [
+    {
+      "type": "group",
+      "from": {
+        "facet": {
+          "name": "series",
+          "data": "data",
+          "groupby": "experiment_id_pod"
+        }
+      },
+      "marks": [
+        {
+          "type": "line",
+          "from": {"data": "series"},
+          "encode": {
+            "enter": {
+              "x": {"scale": "seconds", "field": "ts"},
+              "y": {"scale": "bytes", "field": "value_mb"},
+              "stroke": {"scale": "color", "field": "experiment_id_pod"},
+              "strokeWidth": {"value": 2}
+            }
+          }
+        },
+        {
+          "type": "line",
+          "from": {"data": "series"},
+          "encode": {
+            "enter": {
+              "x": {"scale": "seconds", "field": "ts"},
+              "y": {"scale": "bytes", "field": "value_mb"},
+              "strokeWidth": {"value": 10},
+              "strokeOpacity": {"value": 0},
+              "zindex": {"value": 99},
+              "tooltip": {"signal": "{\"Pod\":datum.pod, \"Value\":format(datum.value_mb, \".1f\"), \"Experiment ID\": slice(datum.experiment_id, 0, 8)+\"...\"}"}
+            }
+          }
+        }
+      ]
+    },
+    {
+      "type": "group",
+      "from": {
+        "facet": {
+          "name": "actions_series",
+          "data": "actions",
+          "groupby": "experiment_id"
+        }
+      },
+      "marks": [
+        {
+          "type": "rule",
+          "from": {"data": "actions_series"},
+          "encode": {
+            "enter": {
+              "x": {"scale": "seconds", "field": "action_ts"},
+              "x2": {"scale": "seconds", "field": "action_ts"},
+              "y": {"value": 0},
+              "y2": {"signal": "height"},
+              "strokeWidth": {"value": 1},
+              "stroke": [
+                {"test": "datum.action_begin", "value": "green"},
+                {"value": "red"}
+              ]
+            }
+          }
+        },
+        {
+          "type": "rule",
+          "from": {"data": "actions_series"},
+          "encode": {
+            "enter": {
+              "x": {"scale": "seconds", "field": "action_ts"},
+              "x2": {"scale": "seconds", "field": "action_ts"},
+              "y": {"value": 0},
+              "y2": {"signal": "height"},
+              "stroke": {"value": "transparent"},
+              "strokeWidth": {"value": 10},
+              "tooltip": {"signal": "{\"Experiment\": slice(datum.experiment_id, 0, 6) + \"...\", \"Action\": datum.action_type, \"Name\": datum.action_name, \"Time\": datum.action_ts}"},
+              "zindex": {"value": 100}
+            }
+          }
+        }
+      ]
+    }
+  ]
+}
diff --git a/src/e2e_test/perf_tool/datastudio/templates/charts/experiment/percent.json b/src/e2e_test/perf_tool/datastudio/templates/charts/experiment/percent.json
new file mode 100644
index 00000000000..26e0db62834
--- /dev/null
+++ b/src/e2e_test/perf_tool/datastudio/templates/charts/experiment/percent.json
@@ -0,0 +1,199 @@
+{
+  "$schema": "https://vega.github.io/schema/vega/v5.json",
+  "title": "{{.ChartTitle}}",
+  "data": [
+    {
+      "name": "default",
+      "transform": [
+        {
+          "type": "project",
+          "fields": [
+            "$dimension0",
+            "$dimension1",
+            "$dimension2",
+            "$dimension3",
+            "$dimension4",
+            "$dimension5",
+            "$dimension6",
+            "$metric0"
+          ],
+          "as": [
+            "experiment_id",
+            "pod",
+            "ts",
+            "action_name",
+            "action_type",
+            "action_begin",
+            "action_ts",
+            "percent_value"
+          ]
+        }
+      ]
+    },
+    {
+      "name": "data",
+      "source": "default",
+      "transform": [
+        {
+          "type": "filter",
+          "expr": "isValid(datum.experiment_id) && isValid(datum.ts) && isValid(datum.percent_value)"
+        },
+        {
+          "type": "collect",
+          "sort": {"field": "ts"}
+        },
+        {
+          "type": "formula",
+          "expr": "(datum.pod)?datum.pod:\"\"",
+          "as": "pod"
+        },
+        {
+          "type": "formula",
+          "expr": "datum.experiment_id + datum.pod",
+          "as": "experiment_id_pod"
+        }
+      ]
+    },
+    {
+      "name": "actions",
+      "source": "default",
+      "transform": [
+        {
+          "type": "filter",
+          "expr": "isValid(datum.action_type) && isValid(datum.action_ts)"
+        },
+        {
+          "type": "filter",
+          "expr": "datum.action_type == 'run'"
+        }
+      ]
+    }
+  ],
+  "scales": [
+    {
+      "name": "seconds",
+      "type": "linear",
+      "range": "width",
+      "domain": {
+        "fields": [
+          {"data": "data", "field": "ts"},
+          {"data": "actions", "field": "action_ts"}
+        ]
+      }
+    },
+    {
+      "name": "percent",
+      "type": "linear",
+      "range": "height",
+      "nice": true,
+      "domain": {"data": "data", "field": "percent_value"}
+    },
+    {
+      "name": "color",
+      "type": "ordinal",
+      "range": "category",
+      "domain": {
+        "data": "data",
+        "field": "experiment_id_pod",
+        "sort": true
+      }
+    }
+  ],
+  "axes": [
+    {
+      "orient": "bottom",
+      "scale": "seconds",
+      "title": "Time Elapsed (s)"
+    },
+    {
+      "orient": "left",
+      "scale": "percent",
+      "format": "%"
+    }
+  ],
+  "marks": [
+    {
+      "type": "group",
+      "from": {
+        "facet": {
+          "name": "series",
+          "data": "data",
+          "groupby": "experiment_id_pod"
+        }
+      },
+      "marks": [
+        {
+          "type": "line",
+          "from": {"data": "series"},
+          "encode": {
+            "enter": {
+              "x": {"scale": "seconds", "field": "ts"},
+              "y": {"scale": "percent", "field": "percent_value"},
+              "stroke": {"scale": "color", "field": "experiment_id_pod"},
+              "strokeWidth": {"value": 2}
+            }
+          }
+        },
+        {
+          "type": "line",
+          "from": {"data": "series"},
+          "encode": {
+            "enter": {
+              "x": {"scale": "seconds", "field": "ts"},
+              "y": {"scale": "percent", "field": "percent_value"},
+              "strokeWidth": {"value": 10},
+              "strokeOpacity": {"value": 0},
+              "zindex": {"value": 99},
+              "tooltip": {"signal": "{\"Pod\":datum.pod, \"Value\":format(datum.percent_value, \".4f\"), \"Experiment ID\": slice(datum.experiment_id, 0, 8)+\"...\"}"}
+            }
+          }
+        }
+      ]
+    },
+    {
+      "type": "group",
+      "from": {
+        "facet": {
+          "name": "actions_series",
+          "data": "actions",
+          "groupby": "experiment_id"
+        }
+      },
+      "marks": [
+        {
+          "type": "rule",
+          "from": {"data": "actions_series"},
+          "encode": {
+            "enter": {
+              "x": {"scale": "seconds", "field": "action_ts"},
+              "x2": {"scale": "seconds", "field": "action_ts"},
+              "y": {"value": 0},
+              "y2": {"signal": "height"},
+              "strokeWidth": {"value": 1},
+              "stroke": [
+                {"test": "datum.action_begin", "value": "green"},
+                {"value": "red"}
+              ]
+            }
+          }
+        },
+        {
+          "type": "rule",
+          "from": {"data": "actions_series"},
+          "encode": {
+            "enter": {
+              "x": {"scale": "seconds", "field": "action_ts"},
+              "x2": {"scale": "seconds", "field": "action_ts"},
+              "y": {"value": 0},
+              "y2": {"signal": "height"},
+              "stroke": {"value": "transparent"},
+              "strokeWidth": {"value": 10},
+              "tooltip": {"signal": "{\"Experiment\": slice(datum.experiment_id, 0, 6) + \"...\", \"Action\": datum.action_type, \"Name\": datum.action_name, \"Time\": datum.action_ts}"},
+              "zindex": {"value": 100}
+            }
+          }
+        }
+      ]
+    }
+  ]
+}
diff --git a/src/e2e_test/perf_tool/datastudio/templates/charts/suite/bytes.json b/src/e2e_test/perf_tool/datastudio/templates/charts/suite/bytes.json
new file mode 100644
index 00000000000..753c0bbd0a3
--- /dev/null
+++ b/src/e2e_test/perf_tool/datastudio/templates/charts/suite/bytes.json
@@ -0,0 +1,133 @@
+{
+  "$schema": "https://vega.github.io/schema/vega/v5.json",
+  "title": "{{.ChartTitle}}",
+  "data": [
+    {
+      "name": "default",
+      "transform": [
+        {
+          "type": "project",
+          "fields": [
+            "$dimension0",
+            "$dimension1",
+            "$dimension2",
+            "$dimension3",
+            "$dimension4",
+            "$dimension5",
+            "$dimension6",
+            "$metric0"
+          ],
+          "as": [
+            "suite",
+            "workload",
+            "parameters",
+            "commit_sha",
+            "pod",
+            "topo_order",
+            "experiment_link",
+            "median_byte_value"
+          ]
+        },
+        {
+          "type": "formula",
+          "expr": "datum.median_byte_value/1024/1024",
+          "as": "value_mb"
+        },
+        {
+          "type": "formula",
+          "expr": "(datum.pod)?datum.pod:\"\"",
+          "as": "pod"
+        },
+        {
+          "type": "formula",
+          "expr": "datum.suite+datum.workload+datum.parameters",
+          "as": "color_key"
+        }
+      ]
+    }
+  ],
+  "scales": [
+    {
+      "name": "commits",
+      "type": "band",
+      "domain": {
+        "data": "default",
+        "field": "commit_sha",
+        "sort": {"op": "mean", "field": "topo_order", "order": "ascending"}
+      },
+      "padding": 1,
+      "range": "width",
+      "round": true
+    },
+    {
+      "name": "bytes",
+      "range": "height",
+      "nice": true,
+      "padding": 1,
+      "domain": {"data": "default", "field": "value_mb"}
+    },
+    {
+      "name": "color",
+      "type": "ordinal",
+      "range": "category",
+      "domain": {
+        "data": "default",
+        "field": "color_key",
+        "sort": true
+      }
+    }
+  ],
+  "axes": [
+    {
+      "orient": "left",
+      "scale": "bytes",
+      "title": "MiB"
+    },
+    {
+      "orient": "bottom",
+      "scale": "commits",
+      "labelAngle": 90,
+      "labelAlign": "left",
+      "title": "Commits",
+      "encode": {
+        "labels": {
+          "interactive": true,
+          "update": {
+            "href": {
+              "signal": "\"https://github.com/pixie-io/pixie/commit/\" + datum.value"
+            },
+            "text": {"signal": "slice(datum.value, 0, 6)"}
+          }
+        }
+      }
+    }
+  ],
+
+  "marks": [
+    {
+      "type": "symbol",
+      "from": {"data": "default"},
+      "encode": {
+        "enter": {
+          "x": {"scale": "commits", "field": "commit_sha"},
+          "y": {
+            "scale": "bytes",
+            "field": "value_mb"
+          },
+          "fill": {
+            "scale": "color",
+            "field": "color_key"
+          },
+          "tooltip": {
+            "signal": "{\"Suite\": datum.suite, \"Workload\": datum.workload, \"Parameters\": datum.parameters, \"Pod\": datum.pod, \"Commit\": datum.commit_sha, \"Value\": datum.value_mb}"
+          }
+        },
+        "update": {
+          "href": {
+            "field": "experiment_link"
+          }
+        }
+      }
+    }
+  ]
+}
diff --git a/src/e2e_test/perf_tool/datastudio/templates/charts/suite/percent.json b/src/e2e_test/perf_tool/datastudio/templates/charts/suite/percent.json
new file mode 100644
index 00000000000..129ea2ce701
--- /dev/null
+++ b/src/e2e_test/perf_tool/datastudio/templates/charts/suite/percent.json
@@ -0,0 +1,132 @@
+{
+  "$schema": "https://vega.github.io/schema/vega/v5.json",
+  "title": "{{.ChartTitle}}",
+  "data": [
+    {
+      "name": "default",
+      "transform": [
+        {
+          "type": "project",
+          "fields": [
+            "$dimension0",
+            "$dimension1",
+            "$dimension2",
+            "$dimension3",
+            "$dimension4",
+            "$dimension5",
+            "$dimension6",
+            "$metric0"
+          ],
+          "as": [
+            "suite",
+            "workload",
+            "parameters",
+            "commit_sha",
+            "pod",
+            "topo_order",
+            "experiment_link",
+            "median_percent_value"
+          ]
+        },
+        {
+          "type": "formula",
+          "expr": "datum.suite+datum.workload+datum.parameters",
+          "as": "color_key"
+        },
+        {
+          "type": "formula",
+          "expr": "(datum.pod)?datum.pod:\"\"",
+          "as": "pod"
+        }
+      ]
+    }
+  ],
+
+  "scales": [
+    {
+      "name": "commits",
+      "type": "band",
+      "domain": {
+        "data": "default",
+        "field": "commit_sha",
+        "sort": {"op": "mean", "field": "topo_order", "order": "ascending"}
+      },
+      "padding": 1,
+      "range": "width",
+      "round": true
+    },
+    {
+      "name": "percent",
+      "range": "height",
+      "nice": true,
+      "padding": 1,
+      "domain": {"data": "default", "field": "median_percent_value"}
+    },
+    {
+      "name": "color",
+      "type": "ordinal",
+      "range": "category",
+      "domain": {
+        "data": "default",
+        "field": "color_key",
+        "sort": true
+      }
+    }
+  ],
+
+  "axes": [
+    {
+      "orient": "left",
+      "scale": "percent",
+      "title": ["%"],
+      "format": "%"
+    },
+    {
+      "orient": "bottom",
+      "scale": "commits",
+      "labels": true,
+      "labelAngle": 90,
+      "labelAlign": "left",
+      "title": "Commits",
+      "encode": {
+        "labels": {
+          "interactive": true,
+          "update": {
+            "href": {
+              "signal": "\"https://github.com/pixie-io/pixie/commit/\" + datum.value"
+            },
+            "text": {"signal": "slice(datum.value, 0, 6)"}
+          }
+        }
+      }
+    }
+  ],
+
+  "marks": [
+        {
+          "type": "symbol",
+          "from": {"data": "default"},
+          "encode": {
+            "enter": {
+              "x": {"scale": "commits", "field": "commit_sha"},
+              "y": {
+                "scale": "percent",
+                "field": "median_percent_value"
+              },
+              "fill": {
+                "scale": "color",
+                "field": "color_key"
+              },
+              "tooltip": {
+                "signal": "{\"Suite\": datum.suite, \"Workload\": datum.workload, \"Parameters\": datum.parameters, \"Pod\": datum.pod, \"Commit\": datum.commit_sha, \"Value\": datum.median_percent_value}"
+              }
+            },
+            "update": {
+              "href": {
+                "field": "experiment_link"
+              }
+            }
+          }
+        }
+  ]
+}
diff --git a/src/e2e_test/perf_tool/datastudio/templates/queries/all_suites_workloads_parameters.sql b/src/e2e_test/perf_tool/datastudio/templates/queries/all_suites_workloads_parameters.sql
new file mode 100644
index 00000000000..84d7fe1030d
--- /dev/null
+++ b/src/e2e_test/perf_tool/datastudio/templates/queries/all_suites_workloads_parameters.sql
@@ -0,0 +1,62 @@
+WITH json_unnest AS (
+  SELECT * FROM (
+    SELECT
+      experiment_id,
+      commit_topo_order,
+      spec,
+      json_value(spec, '$.commitSha') as commit_sha,
+      cast(json_value(spec, '$.clusterSpec.numNodes') as int64) as num_nodes,
+      json_value_array(spec, '$.tags') as tag_array
+    FROM `{{.Project}}.{{.Dataset}}.specs`
+  )
+  WHERE
+    num_nodes = 1 AND
+    ("main" IN UNNEST(tag_array))
+),
+specs_filtered AS (
+  SELECT * FROM (
+    SELECT
+      s1.experiment_id,
+      s1.commit_topo_order,
+      s1.commit_sha,
+      json_value_array(s1.spec, '$.tags') as tag_array,
+      count(distinct s2.commit_sha) as commits_ahead
+    FROM json_unnest as s1
+    LEFT JOIN json_unnest as s2
+      ON s2.commit_topo_order > s1.commit_topo_order
+    GROUP BY s1.experiment_id, s1.commit_topo_order, s1.commit_sha, s1.spec
+  )
+  WHERE
+    commits_ahead < (@num_commits * @show_every_n_commits)
+    AND mod(commits_ahead, @show_every_n_commits) = 0
+),
+suite_workload_and_parameters AS (
+  SELECT
+    experiment_id,
+    suite,
+    workload,
+    array_to_string(array_agg(parameter), ",") AS parameters,
+  FROM (
+    SELECT
+      experiment_id,
+      regexp_extract(tag1, r'^parameter[/](.+)') as parameter,
+      regexp_extract(tag2, r'^workload[/]([^/]+)') as workload,
+      regexp_extract(tag3, r'^suite[/]([^/]+)') as suite
+    FROM specs_filtered
+    CROSS JOIN UNNEST(specs_filtered.tag_array) tag1
+    CROSS JOIN UNNEST(specs_filtered.tag_array) tag2
+    CROSS JOIN UNNEST(specs_filtered.tag_array) tag3
+  )
+  WHERE
+    parameter is not null AND
+    workload is not null AND
+    suite is not null
+  GROUP BY experiment_id, workload, suite
+)
+SELECT
+  COUNT(*) as num_experiments,
+  suite,
+  workload,
+  parameters
+FROM suite_workload_and_parameters
+GROUP BY suite, workload, parameters
diff --git a/src/e2e_test/perf_tool/datastudio/templates/queries/cpu_usage_preprocessing.sql b/src/e2e_test/perf_tool/datastudio/templates/queries/cpu_usage_preprocessing.sql
new file mode 100644
index 00000000000..fe2951462cb
--- /dev/null
+++ b/src/e2e_test/perf_tool/datastudio/templates/queries/cpu_usage_preprocessing.sql
@@ -0,0 +1,19 @@
+preprocessed AS (
+  SELECT * FROM
+  (
+    SELECT
+      r1.experiment_id,
+      r1.timestamp,
+      r1.tags,
+      COALESCE(
+        r1.cpu_usage,
+        IF(r1.cpu_seconds_counter is NULL, NULL,
+          (min(r2.cpu_seconds_counter) - r1.cpu_seconds_counter) / TIMESTAMP_DIFF(min(r2.timestamp), r1.timestamp, SECOND))
+      ) AS cpu_usage
+    FROM pivoted_results AS r1
+    LEFT JOIN pivoted_results AS r2
+      ON r1.experiment_id = r2.experiment_id AND r1.tags = r2.tags AND r2.timestamp > r1.timestamp
+    GROUP BY r1.experiment_id, r1.timestamp, r1.tags, r1.cpu_usage, r1.cpu_seconds_counter
+  )
+  WHERE cpu_usage is not NULL
+)
diff --git a/src/e2e_test/perf_tool/datastudio/templates/queries/experiment_view.sql b/src/e2e_test/perf_tool/datastudio/templates/queries/experiment_view.sql
new file mode 100644
index 00000000000..041a7646b8c
--- /dev/null
+++ b/src/e2e_test/perf_tool/datastudio/templates/queries/experiment_view.sql
@@ -0,0 +1,173 @@
+WITH json_unnest AS (
+  SELECT
+    experiment_id,
+    json_value(spec, '$.commitSha') as commit_sha,
+    json_value_array(spec, '$.tags') as tag_array
+  FROM `{{.Project}}.{{.Dataset}}.specs`
+  WHERE experiment_id IN UNNEST(SPLIT(@experiment_ids, ","))
+),
+suite_workload_and_parameters AS (
+  SELECT
+    experiment_id,
+    commit_sha,
+    suite,
+    workload,
+    array_to_string(array_agg(parameter), ",") as parameters
+  FROM (
+    SELECT
+      experiment_id,
+      commit_sha,
+      regexp_extract(tag1, r'^parameter[/](.+)') as parameter,
+      regexp_extract(tag2, r'^workload[/]([^/]+)') as workload,
+      regexp_extract(tag3, r'^suite[/]([^/]+)') as suite
+    FROM json_unnest
+    CROSS JOIN UNNEST(json_unnest.tag_array) tag1
+    CROSS JOIN UNNEST(json_unnest.tag_array) tag2
+    CROSS JOIN UNNEST(json_unnest.tag_array) tag3
+  )
+  WHERE
+    parameter is not null AND
+    workload is not null AND
+    suite is not null
+  GROUP BY experiment_id, commit_sha, suite, workload
+),
+actions AS (
+  SELECT
+    r.experiment_id,
+    r.timestamp as action_ts,
+    IFNULL(REGEXP_EXTRACT(r.name, '[^:]+:(.*)'), '') as action_name,
+    IFNULL(REGEXP_EXTRACT(r.name, '[^:_]+_([^:]+):.*'), 'run') as action_type,
+    IFNULL(REGEXP_EXTRACT(r.name, '([^:_]+)_[^:]+:.*'), 'begin') = 'begin' as action_begin,
+    s.commit_sha,
+    s.parameters,
+    s.workload,
+    s.suite
+  FROM `{{.Project}}.{{.Dataset}}.results` AS r
+  JOIN suite_workload_and_parameters AS s USING (experiment_id)
+  WHERE r.name LIKE 'begin_%:%' OR r.name LIKE 'end_%:%' OR r.name = 'workloads_deployed'
+),
+pivoted_results AS (
+  SELECT *
+  FROM `{{.Project}}.{{.Dataset}}.results`
+  PIVOT(
+    any_value(value)
+    FOR name
+      IN (
+        {{- $n := len .MetricsUsed -}}
+        {{- range $index, $element := .MetricsUsed -}}
+          '{{$element}}'
+          {{- if add1 $index | ne $n -}}
+            ,
+          {{- end -}}
+        {{- end -}}
+      )
+  )
+  WHERE
+    {{$n := len .MetricsUsed -}}
+    {{- range $index, $element := .MetricsUsed}}
+      {{$element}} is not null
+      {{if add1 $index | ne $n -}}
+        OR
+      {{- end}}
+    {{- end}}
+),
+{{- if ne .CustomPreprocessing "" -}}
+{{.CustomPreprocessing}},
+{{else}}
+-- By default we require all of the MetricsUsed to be valid. So that eg. `heap_size_bytes - table_size` make sense.
+preprocessed AS (
+  SELECT *
+  FROM pivoted_results
+  WHERE
+    {{$n := len .MetricsUsed -}}
+    {{- range $index, $element := .MetricsUsed}}
+      {{$element}} is not null
+      {{if add1 $index | ne $n -}}
+        AND
+      {{- end}}
+    {{- end}}
+),
+{{- end}}
+all_results AS (
+  SELECT
+    r.experiment_id,
+    r.timestamp as ts,
+    {{range $idx, $name := .MetricNames -}}
+      {{index $.MetricExprs $idx}} AS {{$name}},
+    {{- end}}
+    json_value(r.tags, '$.pod') as pod,
+    s.commit_sha,
+    s.parameters,
+    s.workload,
+    s.suite
+  FROM preprocessed AS r
+  JOIN suite_workload_and_parameters AS s USING (experiment_id)
+),
+with_actions AS (
+  SELECT
+    experiment_id,
+    ts,
+    {{range $name := .MetricNames -}}
+      {{$name}},
+    {{- end}}
+    pod,
+    commit_sha,
+    parameters,
+    workload,
+    suite,
+    NULL as action_ts,
+    NULL as action_name,
+    NULL as action_type,
+    false as action_begin
+  FROM all_results
+  UNION ALL
+  SELECT
+    experiment_id,
+    NULL as ts,
+    {{range $name := .MetricNames -}}
+      NULL as {{$name}},
+    {{- end}}
+    NULL as pod,
+    commit_sha,
+    parameters,
+    workload,
+    suite,
+    action_ts,
+    action_name,
+    action_type,
+    action_begin
+  FROM actions
+),
+min_time_agg AS (
+  SELECT
+    experiment_id,
+    least(min(ts), min(action_ts)) as min_ts
+  FROM with_actions
+  GROUP BY experiment_id
+),
+with_min_time AS (
+  SELECT
+    r.*,
+    agg.min_ts
+  FROM with_actions AS r
+  LEFT JOIN min_time_agg AS agg USING (experiment_id)
+),
+relative_timestamps AS (
+  SELECT
+    experiment_id,
+    TIMESTAMP_DIFF(ts, min_ts, SECOND) AS ts,
+    {{range $name := .MetricNames -}}
+      {{$name}},
+    {{- end}}
+    pod,
+    commit_sha,
+    parameters,
+    workload,
+    suite,
+    TIMESTAMP_DIFF(action_ts, min_ts, SECOND) AS action_ts,
+    action_name,
+    action_type,
+    action_begin
+  FROM with_min_time
+)
+SELECT * FROM relative_timestamps
diff --git a/src/e2e_test/perf_tool/datastudio/templates/queries/suite_view.sql b/src/e2e_test/perf_tool/datastudio/templates/queries/suite_view.sql
new file mode 100644
index 00000000000..c65483a99c1
--- /dev/null
+++ b/src/e2e_test/perf_tool/datastudio/templates/queries/suite_view.sql
@@ -0,0 +1,251 @@
+WITH json_unnest AS (
+  SELECT * FROM (
+    SELECT
+      experiment_id,
+      commit_topo_order,
+      spec,
+      json_value(spec, '$.commitSha') as commit_sha,
+      cast(json_value(spec, '$.clusterSpec.numNodes') as int64) as num_nodes,
+      json_value_array(spec, '$.tags') as tag_array
+    FROM `{{.Project}}.{{.Dataset}}.specs`
+  )
+  WHERE
+    num_nodes = 1 AND
+    ("main" IN UNNEST(tag_array)) AND
+    ("application_overhead" NOT IN UNNEST(tag_array))
+),
+specs_filtered AS (
+  SELECT * FROM (
+    SELECT
+      s1.experiment_id,
+      s1.commit_topo_order,
+      s1.commit_sha,
+      json_value_array(s1.spec, '$.tags') as tag_array,
+      count(distinct s2.commit_sha) as commits_ahead
+    FROM json_unnest as s1
+    LEFT JOIN json_unnest as s2
+      ON s2.commit_topo_order > s1.commit_topo_order
+    GROUP BY s1.experiment_id, s1.commit_topo_order, s1.commit_sha, s1.spec
+  )
+  WHERE
+    commits_ahead < (@num_commits * @show_every_n_commits)
+    AND mod(commits_ahead, @show_every_n_commits) = 0
+),
+suite_workload_and_parameters AS (
+  SELECT
+    experiment_id,
+    commit_topo_order,
+    commit_sha,
+    suite,
+    workload,
+    array_to_string(array_agg(parameter), ",") AS parameters,
+  FROM (
+    SELECT
+      experiment_id,
+      commit_topo_order,
+      commit_sha,
+      regexp_extract(tag1, r'^parameter[/](.+)') as parameter,
+      regexp_extract(tag2, r'^workload[/]([^/]+)') as workload,
+      regexp_extract(tag3, r'^suite[/]([^/]+)') as suite
+    FROM specs_filtered
+    CROSS JOIN UNNEST(specs_filtered.tag_array) tag1
+    CROSS JOIN UNNEST(specs_filtered.tag_array) tag2
+    CROSS JOIN UNNEST(specs_filtered.tag_array) tag3
+  )
+  WHERE
+    parameter is not null AND
+    workload is not null AND
+    suite is not null
+  GROUP BY experiment_id, commit_sha, workload, suite, commit_topo_order
+),
+all_actions AS (
+  SELECT
+    r.experiment_id,
+    r.timestamp as action_ts,
+    REGEXP_EXTRACT(r.name, '[^:]+:(.*)') as action_name,
+    REGEXP_EXTRACT(r.name, '[^:_]+_([^:]+):.*') as action_type,
+    REGEXP_EXTRACT(r.name, '([^:_]+)_[^:]+:.*') = 'begin' as action_begin,
+  FROM `{{.Project}}.{{.Dataset}}.results` AS r
+  JOIN suite_workload_and_parameters AS s USING (experiment_id)
+  WHERE r.name LIKE 'begin_%:%' OR r.name LIKE 'end_%:%'
+),
+run_begin_end AS (
+  SELECT
+    a1.experiment_id,
+    min(a1.action_ts) AS run_begin_ts,
+    min(a2.action_ts) AS run_end_ts,
+  FROM all_actions AS a1
+  JOIN all_actions AS a2 USING (experiment_id)
+  WHERE
+    a1.action_type = 'run' AND
+    a1.action_begin AND
+    a2.action_type = 'run' AND
+    NOT a2.action_begin
+  GROUP BY a1.experiment_id
+  UNION ALL
+  SELECT
+    experiment_id,
+    timestamp AS run_begin_ts,
+    -- for legacy experiments that used `workloads_deployed`, set the end time as 1 year after the start.
+    -- this is only used to limit results, so it doesn't matter as long as no experiments go for longer than a year.
+    TIMESTAMP_ADD(timestamp, INTERVAL 365 DAY) AS run_end_ts
+  FROM `{{.Project}}.{{.Dataset}}.results`
+  WHERE name = 'workloads_deployed'
+),
+pivoted_results AS (
+  SELECT *
+  FROM `{{.Project}}.{{.Dataset}}.results`
+  PIVOT(
+    any_value(value)
+    FOR name
+      IN (
+        {{- $n := len .MetricsUsed -}}
+        {{- range $index, $element := .MetricsUsed -}}
+          '{{$element}}'
+          {{- if add1 $index | ne $n -}}
+            ,
+          {{- end -}}
+        {{- end -}}
+      )
+  )
+  WHERE {{$n := len .MetricsUsed -}}
+    {{- range $index, $element := .MetricsUsed}}
+      {{$element}} is not null
+      {{if add1 $index | ne $n -}}
+        OR
+      {{- end -}}
+    {{- end}}
+),
+{{- if ne .CustomPreprocessing "" -}}
+{{.CustomPreprocessing}},
+{{else}}
+-- By default we require all of the MetricsUsed to be valid. So that eg. `heap_size_bytes - table_size` make sense.
+preprocessed AS (
+  SELECT *
+  FROM pivoted_results
+  WHERE
+    {{$n := len .MetricsUsed -}}
+    {{- range $index, $element := .MetricsUsed}}
+      {{$element}} is not null
+      {{if add1 $index | ne $n -}}
+        AND
+      {{- end}}
+    {{- end}}
+),
+{{- end}}
+joined_results AS (
+  SELECT
+    r.experiment_id,
+    r.timestamp as ts,
+    {{.MetricSelectExpr}} AS {{.MetricName}},
+    rtrim(
+      regexp_extract(
+        json_value(r.tags, '$.pod'),
+        r'^pl[/]((?:[a-z]+\-)+)(?:\-?(?:[a-z]+[0-9]|[0-9]+[a-z])[a-z0-9]*)*?'
+      ),
+      "-"
+    ) as pod,
+    s.commit_sha,
+    s.commit_topo_order,
+    s.parameters,
+    s.workload,
+    s.suite
+  FROM preprocessed AS r
+  JOIN suite_workload_and_parameters AS s USING (experiment_id)
+),
+with_begin_end AS (
+  SELECT
+    r.experiment_id,
+    r.ts,
+    r.{{.MetricName}},
+    r.pod,
+    r.commit_sha,
+    r.commit_topo_order,
+    r.parameters,
+    r.workload,
+    r.suite,
+    begin_end.run_begin_ts,
+    begin_end.run_end_ts,
+  FROM joined_results AS r
+  LEFT JOIN run_begin_end AS begin_end USING (experiment_id)
+),
+min_time_agg AS (
+  SELECT
+    experiment_id,
+    min(ts) as min_ts
+  FROM with_begin_end
+  GROUP BY experiment_id
+),
+with_min_time AS (
+  SELECT
+    r.experiment_id,
+    r.ts,
+    r.{{.MetricName}},
+    r.pod,
+    r.commit_sha,
+    r.commit_topo_order,
+    r.parameters,
+    r.workload,
+    r.suite,
+    r.run_begin_ts,
+    r.run_end_ts,
+    agg.min_ts
+  FROM with_begin_end AS r
+  LEFT JOIN min_time_agg AS agg USING (experiment_id)
+),
+ignore_burnin AS (
+  SELECT *
+  FROM with_min_time
+  WHERE TIMESTAMP_DIFF(ts, run_begin_ts, SECOND) >= 0 AND
+    TIMESTAMP_DIFF(run_end_ts, ts, SECOND) >= 0
+),
+{{- if ne .TimeAgg "" -}}
+time_agg AS (
+  SELECT
+    experiment_id,
+    suite,
+    workload,
+    parameters,
+    commit_sha,
+    commit_topo_order,
+    pod,
+    {{.TimeAgg}} as {{.MetricName}}
+  FROM ignore_burnin
+  GROUP BY experiment_id, suite, workload, parameters, commit_sha, commit_topo_order, pod
+),
+{{- end -}}
+agged AS (
+  SELECT
+    COUNT(DISTINCT experiment_id) as experiment_count,
+    array_agg(DISTINCT experiment_id) as experiment_ids,
+    suite,
+    workload,
+    parameters,
+    commit_sha,
+    commit_topo_order,
+    pod,
+    approx_quantiles({{.MetricName}}, 100) as quantiles
+  FROM {{if ne .TimeAgg ""}}time_agg{{else}}ignore_burnin{{end}}
+  GROUP BY suite, workload, parameters, commit_sha, commit_topo_order, pod
+)
+SELECT
+  experiment_count,
+  experiment_ids,
+  (
+    -- the params have to be URL encoded, so this is a bit ugly.
+    'https://datastudio.google.com/reporting/{{.DSReportID}}/page/{{.DSExperimentPageID}}?params=%7B%22experiment_ids%22%3A%22'
+    || array_to_string(experiment_ids, "%2C")
+    ||'%22%7D'
+  ) AS experiment_link,
+  suite,
+  workload,
+  parameters,
+  commit_sha,
+  commit_topo_order,
+  pod,
+  quantiles[OFFSET(1)] AS {{.MetricName}}_p01,
+  quantiles[OFFSET(25)] AS {{.MetricName}}_p25,
+  quantiles[OFFSET(50)] AS {{.MetricName}}_p50,
+  quantiles[OFFSET(75)] AS {{.MetricName}}_p75,
+  quantiles[OFFSET(99)] AS {{.MetricName}}_p99
+FROM agged
diff --git a/src/e2e_test/perf_tool/datastudio/templates/queries/suite_view_app_overhead.sql b/src/e2e_test/perf_tool/datastudio/templates/queries/suite_view_app_overhead.sql
new file mode 100644
index 00000000000..ad31ba4af6c
--- /dev/null
+++ b/src/e2e_test/perf_tool/datastudio/templates/queries/suite_view_app_overhead.sql
@@ -0,0 +1,269 @@
+WITH json_unnest AS (
+  SELECT * FROM (
+    SELECT
+      experiment_id,
+      commit_topo_order,
+      spec,
+      json_value(spec, '$.commitSha') as commit_sha,
+      cast(json_value(spec, '$.clusterSpec.numNodes') as int64) as num_nodes,
+      json_value_array(spec, '$.tags') as tag_array
+    FROM `{{.Project}}.{{.Dataset}}.specs`
+  )
+  WHERE
+    num_nodes = 1 AND
+    ("main" IN UNNEST(tag_array)) AND
+    ("application_overhead" IN UNNEST(tag_array))
+),
+specs_filtered AS (
+  SELECT * FROM (
+    SELECT
+      s1.experiment_id,
+      s1.commit_topo_order,
+      s1.commit_sha,
+      json_value_array(s1.spec, '$.tags') as tag_array,
+      count(distinct s2.commit_sha) as commits_ahead
+    FROM json_unnest as s1
+    LEFT JOIN json_unnest as s2
+      ON s2.commit_topo_order > s1.commit_topo_order
+    GROUP BY s1.experiment_id, s1.commit_topo_order, s1.commit_sha, s1.spec
+  )
+  WHERE
+    commits_ahead < (@num_commits * @show_every_n_commits)
+    AND mod(commits_ahead, @show_every_n_commits) = 0
+),
+suite_workload_and_parameters AS (
+  SELECT
+    experiment_id,
+    commit_topo_order,
+    commit_sha,
+    suite,
+    workload,
+    array_to_string(array_agg(parameter), ",") AS parameters,
+  FROM (
+    SELECT
+      experiment_id,
+      commit_topo_order,
+      commit_sha,
+      regexp_extract(tag1, r'^parameter[/](.+)') as parameter,
+      regexp_extract(tag2, r'^workload[/]([^/]+)') as workload,
+      regexp_extract(tag3, r'^suite[/]([^/]+)') as suite
+    FROM specs_filtered
+    CROSS JOIN UNNEST(specs_filtered.tag_array) tag1
+    CROSS JOIN UNNEST(specs_filtered.tag_array) tag2
+    CROSS JOIN UNNEST(specs_filtered.tag_array) tag3
+  )
+  WHERE
+    parameter is not null AND
+    workload is not null AND
+    suite is not null
+  GROUP BY experiment_id, commit_sha, workload, suite, commit_topo_order
+),
+all_actions AS (
+  SELECT
+    r.experiment_id,
+    r.timestamp as action_ts,
+    REGEXP_EXTRACT(r.name, '[^:]+:(.*)') as action_name,
+    REGEXP_EXTRACT(r.name, '[^:_]+_([^:]+):.*') as action_type,
+    REGEXP_EXTRACT(r.name, '([^:_]+)_[^:]+:.*') = 'begin' as action_begin,
+  FROM `{{.Project}}.{{.Dataset}}.results` AS r
+  JOIN suite_workload_and_parameters AS s USING (experiment_id)
+  WHERE r.name LIKE 'begin_%:%' OR r.name LIKE 'end_%:%'
+),
+running_timestamps AS (
+  SELECT
+    a1.experiment_id,
+    min(a1.action_ts) AS no_vizier_begin_ts,
+    min(a2.action_ts) AS no_vizier_end_ts,
+    min(a3.action_ts) AS with_vizier_begin_ts,
+    min(a4.action_ts) AS with_vizier_end_ts
+  FROM all_actions AS a1
+  JOIN all_actions AS a2 USING (experiment_id)
+  JOIN all_actions AS a3 USING (experiment_id)
+  JOIN all_actions AS a4 USING (experiment_id)
+  WHERE
+    a1.action_type = 'run' AND
+    a1.action_name = 'no_vizier' AND
+    a1.action_begin AND
+    a2.action_type = 'run' AND
+    a2.action_name = 'no_vizier' AND
+    NOT a2.action_begin AND
+    a3.action_type = 'run' AND
+    a3.action_name = 'with_vizier' AND
+    a3.action_begin AND
+    a4.action_type = 'run' AND
+    a4.action_name = 'with_vizier' AND
+    NOT a4.action_begin
+  GROUP BY a1.experiment_id
+),
+pivoted_results AS (
+  SELECT *
+  FROM `{{.Project}}.{{.Dataset}}.results`
+  PIVOT(
+    any_value(value)
+    FOR name
+      IN (
+        {{- $n := len .MetricsUsed -}}
+        {{- range $index, $element := .MetricsUsed -}}
+          '{{$element}}'
+          {{- if add1 $index | ne $n -}}
+            ,
+          {{- end -}}
+        {{- end -}}
+      )
+  )
+  WHERE {{$n := len .MetricsUsed -}}
+    {{- range $index, $element := .MetricsUsed}}
+      {{$element}} is not null
+      {{if add1 $index | ne $n -}}
+        OR
+      {{- end -}}
+    {{- end}}
+),
+{{- if ne .CustomPreprocessing "" -}}
+{{.CustomPreprocessing}},
+{{else}}
+-- By default we require all of the MetricsUsed to be valid. So that eg. `heap_size_bytes - table_size` make sense.
+preprocessed AS (
+  SELECT *
+  FROM pivoted_results
+  WHERE
+    {{$n := len .MetricsUsed -}}
+    {{- range $index, $element := .MetricsUsed}}
+      {{$element}} is not null
+      {{if add1 $index | ne $n -}}
+        AND
+      {{- end}}
+    {{- end}}
+),
+{{- end}}
+joined_results AS (
+  SELECT
+    r.experiment_id,
+    r.timestamp as ts,
+    {{.MetricSelectExpr}} AS {{.MetricName}},
+    rtrim(
+      regexp_extract(
+        json_value(r.tags, '$.pod'),
+        r'^(?:[a-z\-]+[/])?((?:[a-z]+\-)+)(?:\-?(?:[a-z]+[0-9]|[0-9]+[a-z])[a-z0-9]*)*?'
+      ),
+      "-"
+    ) as pod,
+    s.commit_sha,
+    s.commit_topo_order,
+    s.parameters,
+    s.workload,
+    s.suite
+  FROM preprocessed AS r
+  JOIN suite_workload_and_parameters AS s USING (experiment_id)
+),
+with_running_timestamps AS (
+  SELECT
+    r.experiment_id,
+    r.ts,
+    r.{{.MetricName}},
+    r.pod,
+    r.commit_sha,
+    r.commit_topo_order,
+    r.parameters,
+    r.workload,
+    r.suite,
+    (
+      TIMESTAMP_DIFF(r.ts, t.no_vizier_begin_ts, SECOND) >=0 AND
+      TIMESTAMP_DIFF(t.no_vizier_end_ts, r.ts, SECOND) >= 0
+    ) AS no_vizier,
+    (
+      TIMESTAMP_DIFF(r.ts, t.with_vizier_begin_ts, SECOND) >=0 AND
+      TIMESTAMP_DIFF(t.with_vizier_end_ts, r.ts, SECOND) >= 0
+    ) AS with_vizier,
+    t.with_vizier_begin_ts,
+    t.with_vizier_end_ts
+  FROM joined_results AS r
+  LEFT JOIN running_timestamps AS t USING (experiment_id)
+),
+min_time_agg AS (
+  SELECT
+    experiment_id,
+    min(ts) as min_ts
+  FROM with_running_timestamps
+  GROUP BY experiment_id
+),
+with_min_time AS (
+  SELECT
+    r.*,
+    agg.min_ts
+  FROM with_running_timestamps AS r
+  LEFT JOIN min_time_agg AS agg USING (experiment_id)
+),
+time_agg AS (
+  SELECT
+    experiment_id,
+    suite,
+    workload,
+    parameters,
+    commit_sha,
+    commit_topo_order,
+    pod,
+    no_vizier,
+    with_vizier,
+    {{.TimeAgg}} as {{.MetricName}}
+  FROM with_min_time
+  GROUP BY experiment_id, suite, workload, parameters, commit_sha, commit_topo_order, pod, no_vizier, with_vizier
+),
+no_vizier_results AS (
+  SELECT *
+  FROM time_agg
+  WHERE no_vizier
+),
+with_vizier_results AS (
+  SELECT *
+  FROM time_agg
+  WHERE with_vizier
+),
+overhead AS (
+  SELECT
+    no_viz.experiment_id,
+    no_viz.suite,
+    no_viz.workload,
+    no_viz.parameters,
+    no_viz.commit_sha,
+    no_viz.commit_topo_order,
+    no_viz.pod,
+    (with_viz.{{.MetricName}} - no_viz.{{.MetricName}}) / no_viz.{{.MetricName}} AS overhead_percent
+  FROM no_vizier_results as no_viz
+  JOIN with_vizier_results as with_viz USING (experiment_id, suite, workload, parameters, commit_sha, commit_topo_order, pod)
+),
+agged AS (
+  SELECT
+    COUNT(DISTINCT experiment_id) as experiment_count,
+    array_agg(DISTINCT experiment_id) as experiment_ids,
+    suite,
+    workload,
+    parameters,
+    commit_sha,
+    commit_topo_order,
+    pod,
+    approx_quantiles(overhead_percent, 100) as quantiles
+  FROM overhead
+  GROUP BY suite, workload, parameters, commit_sha, commit_topo_order, pod
+)
+SELECT
+  experiment_count,
+  experiment_ids,
+  (
+    -- the params have to be URL encoded, so this is a bit ugly.
+    'https://datastudio.google.com/reporting/{{.DSReportID}}/page/{{.DSExperimentPageID}}?params=%7B%22experiment_ids%22%3A%22'
+    || array_to_string(experiment_ids, "%2C")
+    ||'%22%7D'
+  ) AS experiment_link,
+  suite,
+  workload,
+  parameters,
+  commit_sha,
+  commit_topo_order,
+  pod,
+  quantiles[OFFSET(1)] AS overhead_percent_p01,
+  quantiles[OFFSET(25)] AS overhead_percent_p25,
+  quantiles[OFFSET(50)] AS overhead_percent_p50,
+  quantiles[OFFSET(75)] AS overhead_percent_p75,
+  quantiles[OFFSET(99)] AS overhead_percent_p99
+FROM agged