From 3b86e5c37a041610ffdb84519db14cde4da140a0 Mon Sep 17 00:00:00 2001
From: Rory McKinley <rorymckinley@gmail.com>
Date: Wed, 11 Sep 2024 08:36:06 +0200
Subject: [PATCH] Basic performance testing utilities (#2459)

* Basic performance testing utilities

---------

Co-authored-by: Stuart Corbishley <corbish@gmail.com>
---
 CHANGELOG.md                          |   3 +
 config/test.exs                       |  12 +-
 test/support/performance_analysis.exs | 200 ++++++++++++++++++++++++++
 3 files changed, 214 insertions(+), 1 deletion(-)
 create mode 100644 test/support/performance_analysis.exs

diff --git a/CHANGELOG.md b/CHANGELOG.md
index da252dc8a1..7b8fc22d4c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -17,6 +17,9 @@ and this project adheres to
 
 ### Added
 
+- Add utility module to seed a DB to support query performance analysis.
+  [#2441](https://github.com/OpenFn/lightning/issues/2441)
+
 ### Changed
 
 ### Fixed
diff --git a/config/test.exs b/config/test.exs
index 438170e67f..1300902c31 100644
--- a/config/test.exs
+++ b/config/test.exs
@@ -20,6 +20,15 @@ config :tesla, Lightning.UsageTracking.GithubClient, adapter: Tesla.Mock
 # to provide built-in test partitioning in CI environment.
 # Run `mix help test` for more information.
 #
+
+# If we are running a performance test, set the ownership timeout to infinity.
+ownership_timeout =
+  if System.get_env("PERFORMANCE_TEST") do
+    :infinity
+  else
+    120_000
+  end
+
 # On certain machines we get db queue timeouts, so we raise `queue_target`
 # from 50 to 100 to give the DBConnection some room to respond.
 config :lightning, Lightning.Repo,
@@ -29,7 +38,8 @@ config :lightning, Lightning.Repo,
   database: "lightning_test#{System.get_env("MIX_TEST_PARTITION")}",
   pool: Ecto.Adapters.SQL.Sandbox,
   pool_size: 15,
-  queue_target: 100
+  queue_target: 100,
+  ownership_timeout: ownership_timeout
 
 config :lightning, Lightning.Vault,
   primary_encryption_key: "M1zzWU6Ego6jV/FUS7e/sj7yF9kRIutgR8uLQ9czrVc="
diff --git a/test/support/performance_analysis.exs b/test/support/performance_analysis.exs
new file mode 100644
index 0000000000..93b4275ad6
--- /dev/null
+++ b/test/support/performance_analysis.exs
@@ -0,0 +1,200 @@
+defmodule Lightning.PerformanceAnalysis do
+  @moduledoc """
+  This module can be used to load a large number of Runs into the database for
+  the purposes of evaluating query performance. The module also provides a
+  a convenience method to dump query SQL to files for the purposes of comparison
+  as well as determining query execution plans.
+
+  This module uses Lightning.Factories to generate the data so it must be run
+  in the `test` environment. In addition it requires the
+  `PERFORMANCE_TEST` ENV variable to be set to ensure that the process does not
+  run afoul of `ownership_timeout` restrictions.
+
+  Usage:
+
+  ```
+  MIX_ENV=test PERFORMANCE_TEST=yes iex -S mix run test/support/performance_analysis.exs
+  ```
+  """
+  import Lightning.Factories
+
+  alias Lightning.Repo
+
+  @doc """
+  Populate a fixed number of runs into the DB. The runs include a mix of
+  states, with the current ratio of 'final' to 'inprogress' being 4:1. One
+  percent of the inprogress runs are priority '0'.
+
+  Usage:
+
+  ```
+  iex(1)> Lightning.PerformanceAnalysis.populate()
+  :ok
+  ```
+  """
+  def populate do
+    no_of_projects = 10
+    no_of_processing_runs_per_project = 1000
+    no_of_finalised_runs_per_project = 4 * no_of_processing_runs_per_project
+
+    insert_list(no_of_projects, :project)
+    |> Enum.each(fn project ->
+      workflow = insert(:simple_workflow, project: project)
+
+      1..no_of_finalised_runs_per_project
+      |> Enum.each(fn counter ->
+        insert_finalised_record_set(workflow, project, counter)
+      end)
+
+      1..no_of_processing_runs_per_project
+      |> Enum.each(fn counter ->
+        insert_processing_record_set(
+          workflow,
+          project,
+          counter + no_of_finalised_runs_per_project
+        )
+      end)
+    end)
+  end
+
+  @doc """
+  This will generate two sql files that can be used to debug and measure
+  query changes (in addition to the unit tests).
+
+  The first file contains just
+  the resulting SQL and can be used to output the results into a text file
+  for before/after comparisons.
+
+  The second file prepends the SQL with the necessary EXPLAIN directive that
+  can be used to generate an execution plan suitable for uploading to
+  https://explain.dalibo.com/.
+
+  The names of the files are based on the `output_name` parameter. If the
+  `output_name` is set to `blah` then the generated files will be named
+  `blah.sql` and `explain_blah.sql`.
+
+  Usage:
+
+  ```
+  alias Lightning.Runs.Query
+  Query.eligible_for_claim() |> Lightning.PerformanceAnalysis.dump_to_sql("blah")
+  ```
+
+  """
+  def dump_to_sql(query, output_name, db_name \\ "lightning_test") do
+    {raw_query, _} = Repo.to_sql(:all, query)
+
+    file_name = "#{output_name}.sql"
+    explain_file_name = "explain_#{output_name}.sql"
+    results_file_name = "explain_#{output_name}_results.json"
+    eol = end_of_line()
+
+    File.write!(file_name, "#{raw_query}#{eol}")
+
+    explain_query =
+      "EXPLAIN (ANALYZE, COSTS, VERBOSE, BUFFERS, FORMAT JSON) #{raw_query}"
+
+    comment =
+      "/* psql -XqAt -f #{explain_file_name} #{db_name} > #{results_file_name} */"
+
+    File.write!(
+      explain_file_name,
+      "#{comment}#{eol}#{explain_query}#{eol}"
+    )
+  end
+
+  defp get_finalised_state do
+    # Based on stats on prod - approx 87% success, 10% failed, and the balance
+    # of states covered in the remaining 3%. Jimmied these numbers a bit so that
+    # I can represent all states within a rangeof 1 to 100.
+    1..100
+    |> Enum.take_random(1)
+    |> case do
+      [100] -> :killed
+      [99] -> :exception
+      [98] -> :lost
+      [num] when num >= 96 -> :crashed
+      [num] when num >= 87 -> :failed
+      _ -> :success
+    end
+  end
+
+  defp get_processing_state do
+    1..100
+    |> Enum.take_random(1)
+    |> case do
+      [num] when num >= 96 -> :started
+      [num] when num >= 91 -> :claimed
+      _ -> :available
+    end
+  end
+
+  defp set_priority do
+    1..100
+    |> Enum.take_random(1)
+    |> case do
+      [1] -> 0
+      _ -> 1
+    end
+  end
+
+  defp insert_finalised_record_set(workflow, project, counter) do
+    priority = 1
+
+    workflow
+    |> setup_dataclip_workorder_run(
+      project,
+      get_finalised_state(),
+      priority,
+      counter
+    )
+  end
+
+  defp insert_processing_record_set(workflow, project, counter) do
+    workflow
+    |> setup_dataclip_workorder_run(
+      project,
+      get_processing_state(),
+      set_priority(),
+      counter
+    )
+  end
+
+  defp setup_dataclip_workorder_run(
+         workflow,
+         project,
+         state,
+         priority,
+         counter
+       ) do
+    %{triggers: [trigger]} = workflow
+
+    dataclip = insert(:dataclip, project: project)
+
+    snapshot = insert(:snapshot, workflow: workflow, lock_version: counter)
+
+    wo =
+      insert(:workorder,
+        workflow: workflow,
+        snapshot: snapshot,
+        trigger: trigger,
+        dataclip: dataclip
+      )
+
+    insert(:run,
+      work_order: wo,
+      dataclip: dataclip,
+      starting_trigger: trigger,
+      state: state,
+      priority: priority,
+      snapshot: snapshot
+    )
+  end
+
+  defp end_of_line do
+    case :os.type() do
+      {:win32, _} -> "\r\n"
+      _ -> "\n"
+    end
+  end
+end