From f5e9b4cbca249296391e1c4fdf1272138c63c6ca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dawid=20Niezg=C3=B3dka?= <dawid.niezgodka@gmail.com>
Date: Mon, 22 Jan 2024 17:35:26 +0100
Subject: [PATCH] add jump det comment

---
 .github/workflows/test.yml           | 36 ++++++-----
 dist/index.js                        | 90 ++++++++++++++++++++++++----
 resultsFolder/result_wg-medium2.json | 39 ------------
 resultsFolder/result_wg-medium3.json | 39 ------------
 resultsFolder/result_wg-medium4.json | 39 ------------
 src/bench.js                         |  1 -
 src/bench_data.js                    |  6 --
 src/comment.js                       | 79 +++++++++++++++++++++++-
 src/main.js                          |  4 +-
 9 files changed, 175 insertions(+), 158 deletions(-)
 delete mode 100644 resultsFolder/result_wg-medium2.json
 delete mode 100644 resultsFolder/result_wg-medium3.json
 delete mode 100644 resultsFolder/result_wg-medium4.json

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 9994df0a..5f4a76a9 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -5,6 +5,7 @@ on:
   push:
     branches:
       - main
+      - missing_eval
 
 jobs:
   test:
@@ -22,7 +23,7 @@ jobs:
           sparse-checkout-cone-mode: false
           path: 'bench_data'
 
-      - run: tree
+#      - run: tree
 
       - name: Evaluate benchmark
         uses: ./
@@ -33,26 +34,23 @@ jobs:
           file_with_bench_data: data.json
           folder_with_current_benchmark_results: resultsFolder
           bench_group_name: "MongoDB Benchmark"
-          evaluation_method: "threshold"
-          threshold_values: 2500,700,50000,5000,30000
-          comparison_operators: tolerance,tolerance,tolerance,tolerance,tolerance
-          comparison_margins: 10,10,10,10,10
-          result_files_merge_strategy_for_each_metric: "sum, average, average, average, average"
+          evaluation_method: "jump_detection"
+          jump_detection_thresholds: "10, 10, 10, 10, 10"
           save_curr_bench_res: true
           github_token: ${{ secrets.GH_TOKEN }}
           github_context: ${{ toJson(github) }}
 
-      - name: Commit files
-        working-directory: ./bench_data
-        run: |
-          git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
-          git config --local user.name "github-actions[bot]"
-          git commit -a -m "Add changes"
-          git push origin bench_data
-
-      - name: Check output and fail if needed
-        if: steps.eval.outputs.should_fail == 'true'
-        run: |
-          echo "Failing as indicated by evaluation action"
-          exit 1
+#      - name: Commit files
+#        working-directory: ./bench_data
+#        run: |
+#          git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
+#          git config --local user.name "github-actions[bot]"
+#          git commit -a -m "Add changes"
+#          git push origin bench_data
+#
+#      - name: Check output and fail if needed
+#        if: steps.eval.outputs.should_fail == 'true'
+#        run: |
+#          echo "Failing as indicated by evaluation action"
+#          exit 1
 
diff --git a/dist/index.js b/dist/index.js
index 1aa6fa69..840dc85f 100644
--- a/dist/index.js
+++ b/dist/index.js
@@ -30073,7 +30073,6 @@ module.exports.createCurrBench = function (config) {
   }).filter(result => result !== null);
 
   let commit;
-  // if config.eventName === schedule, then we will not have
   if (config.eventName === 'schedule') {
     core.info('The workflow was triggered by a scheduled event.');
     commit = getCommitReplacementWhenTriggeredByScheduledEvent(config.runId);
@@ -30125,7 +30124,6 @@ module.exports.addCompleteBenchmarkToFile = async (
     core.debug(`Reading file at ${pathToPreviousDataFile}`)
     try {
       const data = await fs.readFile(pathToPreviousDataFile, 'utf8')
-      //core.debug('Read file: ' + data) // -> can be very long...
       jsonData = JSON.parse(data)
     } catch (err) {
       core.debug(
@@ -30328,13 +30326,9 @@ module.exports.getBenchFromWeekAgo = function (
   );
 
   let benchmarks = data.entries[benchmarkGroupToCompare];
-  // Print the amount of benchmarks
-
   let closestBenchmark = null;
   let smallestDifference = Infinity;
 
-
-
   benchmarks.forEach(benchmark => {
     let difference = Math.abs(now - benchmark.date - ONE_WEEK_IN_MS);
     if (difference < smallestDifference) {
@@ -30397,7 +30391,6 @@ module.exports.getBenchmarkOfStableBranch = function (benchmarkGroupToCompare, f
         folderWithBenchData, fileNameWithBenchData
     );
   let benchmarks = data.entries[benchmarkGroupToCompare];
-  // find benchmark with commit sha == latestBenchSha
   let benchmark = benchmarks.find(benchmark => benchmark.commit.id === latestBenchSha);
   core.debug(`Benchmark of stable branch: ${JSON.stringify(benchmark)}`);
 
@@ -30438,13 +30431,13 @@ module.exports.createComment = function (
       commentBody = module.exports.createBodyForComparisonWithPrev(evaluationResult, completeConfig);
       break;
     case 'previous_successful':
-      commentBody = module.exports.createBodyForComparisonWithPrevSucc(evaluationResult, completeConfig);
+      commentBody = module.exports.createBodyForComparisonWithPrev(evaluationResult, completeConfig);
       break;
     case 'threshold_range':
       commentBody = module.exports.createBodyForComparisonWithThresholdRange(evaluationResult, completeConfig);
       break;
     case 'jump_detection':
-      commentBody = module.exports.createBodyForComparisonWithJumpDet(evaluationResult, completeConfig);
+      commentBody = module.exports.createBodyForComparisonWithJumpDeltas(evaluationResult, completeConfig);
       break;
     case 'trend_detection_moving_ave':
       commentBody = module.exports.createBodyForComparisonWithTrendDetMovAve(evaluationResult, completeConfig);
@@ -30661,6 +30654,79 @@ module.exports.createBodyForComparisonWithTrendDetDeltas = function(evaluationRe
 
   return lines.join('\n')
 }
+
+module.exports.createBodyForComparisonWithJumpDeltas = function(evaluationResult, completeConfig) {
+  core.debug('------ start createBodyForComparisonWithJumpDeltas ------')
+  const currentBenchmark = evaluationResult.referenceBenchmarks.current;
+  const previousBenchmark = evaluationResult.referenceBenchmarks.previous;
+
+  const lines = []
+  lines.push('## Benchmark results')
+  lines.push('')
+  lines.push(`<b>Benchmark group:</b> ${currentBenchmark.benchmarkGroupName}`)
+  lines.push('')
+  lines.push(`The chosen evaluation method is jump_detection.`)
+  lines.push(`For each metric, there is the following condition: 
+        The current value should not change more than X% (Max. ch in the table below) from the value measured for the previous benchmark.`)
+
+  const currentBenchmarkGroupName = currentBenchmark.benchmarkGroupName
+  const previousBenchmarkGroupName = previousBenchmark.benchmarkGroupName
+
+  if (currentBenchmarkGroupName !== previousBenchmarkGroupName) {
+    lines.push(
+        "<b>Note</b>: Benchmarks from different groups are being compared."
+    )
+  }
+  const benchDataText = module.exports.createBenchDataTextForCompWithPrev(
+      currentBenchmark,
+      previousBenchmark
+  )
+  lines.push(benchDataText)
+
+  lines.push('', '', '', '', '')
+  lines.push('## Results')
+  lines.push('', '', '', '', '')
+
+  lines.push(
+      `| Metric | Curr: ${currentBenchmark.commitInfo.id} | Prev: ${previousBenchmark.commitInfo.id} | Max. Jump | Was  | Res | `
+  )
+  lines.push('|-|-|-|-|-|-|-|')
+
+  const evaluationResults = evaluationResult.results.result
+  const evaluationParameters = evaluationResult.evalParameters
+  const evaluationConfiguration = completeConfig.evaluationConfig
+  for (let i = 0; i < evaluationResults.length; i++) {
+
+    const resultStatus = evaluationResults[i];
+    const metricName = evaluationParameters.metricNames[i];
+    const metricUnit = evaluationParameters.metricUnits[i];
+    const actualValue = parseFloat(evaluationParameters.is[i]).toFixed(2);
+
+    const shouldBe = evaluationParameters.shouldBe[i];
+    const ratio = evaluationParameters.is[i];
+
+    const previousBenchRes = parseFloat(evaluationParameters.than[i]).toFixed(2);
+    const prevBenchValAndUnit = previousBenchRes + ' ' + metricUnit;
+    let line
+    let valueAndUnit = actualValue + ' ' + metricUnit
+
+    if (resultStatus === 'failed' || resultStatus === 'passed') {
+      let betterOrWorse = resultStatus === 'passed' ? '🟢' : '🔴'
+      line = `| \`${metricName}\` | \`${valueAndUnit}\` | \`${prevBenchValAndUnit}\` | ${shouldBe} | ${ratio} | ${betterOrWorse} |`
+    } else {
+      line = `| \`${metricName}\` | \'${valueAndUnit}\' | N/A | N/A | N/A | 🔘 |`
+    }
+
+    lines.push(line)
+  }
+
+  const benchmarkPassed = module.exports.addInfoAboutBenchRes(lines, completeConfig, evaluationResults);
+  module.exports.alertUsersIfBenchFailed(benchmarkPassed, completeConfig, lines);
+  return lines.join('\n')
+}
+
+
+
 module.exports.createBenchDataText = function (currentBenchmark) {
   core.info('------ start createBenchDataText ------')
   const benchInfo = currentBenchmark.benchmarkInfo
@@ -30897,7 +30963,7 @@ module.exports.createBodyForComparisonWithThresholdRange = function (
 ///////////////////////
 /////////////////////// Summary
 ///////////////////////
-module.exports.createWorkflowSummaryForCompWithPrev = function (evaluationResult, completeConfig) {
+module.exports.createWorkflowSummaryForCompWithPrev = function (evaluationResult, completeConfig, successful) {
 
   const currentBenchmark = evaluationResult.referenceBenchmarks.current;
   const previousBenchmark = evaluationResult.referenceBenchmarks.previous;
@@ -32665,7 +32731,9 @@ async function run() {
     if (addJobSummary === 'on' || (addJobSummary === 'if_failed' && shouldFail)) {
 
       if (evaluationConfig.evaluationMethod === 'previous') {
-        createWorkflowSummaryForCompWithPrev(evaluationResult, completeConfig);
+        createWorkflowSummaryForCompWithPrev(evaluationResult, completeConfig, false);
+      } else if (evaluationConfig.evaluationMethod === 'previous_successful') {
+        createWorkflowSummaryForCompWithPrev(evaluationResult, completeConfig, true);
       } else if (evaluationConfig.evaluationMethod === 'threshold') {
         createWorkflowSummaryThreshold(evaluationResult, completeConfig);
       } else if (evaluationConfig.evaluationMethod === 'threshold_range') {
diff --git a/resultsFolder/result_wg-medium2.json b/resultsFolder/result_wg-medium2.json
deleted file mode 100644
index a46aa452..00000000
--- a/resultsFolder/result_wg-medium2.json
+++ /dev/null
@@ -1,39 +0,0 @@
-{
-  "benchInfo": {
-    "executionTime": "0m 11s",
-    "otherInfo": "YCSB Parameters: workload=workloada, recordcount=200000, threads=32, ",
-    "parametrization": {
-      "storageEngine": "N/A",
-      "logVerbosity": "N/A",
-      "bindIp": "0.0.0.0",
-      "port": 27017
-    }
-  },
-  "results": [
-    {
-      "unit": "ops/sec",
-      "value": 16884.761502743775,
-      "name": "OVERALL Throughput"
-    },
-    {
-      "unit": "us",
-      "value": 1765.6541831135073,
-      "name": "[READ], AverageLatency(us)"
-    },
-    {
-      "unit": "us",
-      "value": 65407.0,
-      "name": "[READ], MaxLatency(us)"
-    },
-    {
-      "unit": "us",
-      "value": 1866.0130910511919,
-      "name": "[UPDATE], AverageLatency(us)"
-    },
-    {
-      "unit": "us",
-      "value": 156159.0,
-      "name": "[UPDATE], MaxLatency(us)"
-    }
-  ]
-}
\ No newline at end of file
diff --git a/resultsFolder/result_wg-medium3.json b/resultsFolder/result_wg-medium3.json
deleted file mode 100644
index 9b2a400e..00000000
--- a/resultsFolder/result_wg-medium3.json
+++ /dev/null
@@ -1,39 +0,0 @@
-{
-  "benchInfo": {
-    "executionTime": "0m 11s",
-    "otherInfo": "YCSB Parameters: workload=workloada, recordcount=200000, threads=32, ",
-    "parametrization": {
-      "storageEngine": "N/A",
-      "logVerbosity": "N/A",
-      "bindIp": "0.0.0.0",
-      "port": 27017
-    }
-  },
-  "results": [
-    {
-      "unit": "ops/sec",
-      "value": 16770.08217340265,
-      "name": "OVERALL Throughput"
-    },
-    {
-      "unit": "us",
-      "value": 1817.7090485888857,
-      "name": "[READ], AverageLatency(us)"
-    },
-    {
-      "unit": "us",
-      "value": 158847.0,
-      "name": "[READ], MaxLatency(us)"
-    },
-    {
-      "unit": "us",
-      "value": 1861.29883843717,
-      "name": "[UPDATE], AverageLatency(us)"
-    },
-    {
-      "unit": "us",
-      "value": 155135.0,
-      "name": "[UPDATE], MaxLatency(us)"
-    }
-  ]
-}
\ No newline at end of file
diff --git a/resultsFolder/result_wg-medium4.json b/resultsFolder/result_wg-medium4.json
deleted file mode 100644
index 7ca039ff..00000000
--- a/resultsFolder/result_wg-medium4.json
+++ /dev/null
@@ -1,39 +0,0 @@
-{
-  "benchInfo": {
-    "executionTime": "0m 11s",
-    "otherInfo": "YCSB Parameters: workload=workloada, recordcount=200000, threads=32, ",
-    "parametrization": {
-      "storageEngine": "N/A",
-      "logVerbosity": "N/A",
-      "bindIp": "0.0.0.0",
-      "port": 27017
-    }
-  },
-  "results": [
-    {
-      "unit": "ops/sec",
-      "value": 16810.960746406658,
-      "name": "OVERALL Throughput"
-    },
-    {
-      "unit": "us",
-      "value": 1834.0088426016898,
-      "name": "[READ], AverageLatency(us)"
-    },
-    {
-      "unit": "us",
-      "value": 159487.0,
-      "name": "[READ], MaxLatency(us)"
-    },
-    {
-      "unit": "us",
-      "value": 1814.1897455816663,
-      "name": "[UPDATE], AverageLatency(us)"
-    },
-    {
-      "unit": "us",
-      "value": 51807.0,
-      "name": "[UPDATE], MaxLatency(us)"
-    }
-  ]
-}
\ No newline at end of file
diff --git a/src/bench.js b/src/bench.js
index aaf7dc04..4a8ae447 100644
--- a/src/bench.js
+++ b/src/bench.js
@@ -30,7 +30,6 @@ module.exports.createCurrBench = function (config) {
   }).filter(result => result !== null);
 
   let commit;
-  // if config.eventName === schedule, then we will not have
   if (config.eventName === 'schedule') {
     core.info('The workflow was triggered by a scheduled event.');
     commit = getCommitReplacementWhenTriggeredByScheduledEvent(config.runId);
diff --git a/src/bench_data.js b/src/bench_data.js
index 00eb9e48..6ebe439b 100644
--- a/src/bench_data.js
+++ b/src/bench_data.js
@@ -26,7 +26,6 @@ module.exports.addCompleteBenchmarkToFile = async (
     core.debug(`Reading file at ${pathToPreviousDataFile}`)
     try {
       const data = await fs.readFile(pathToPreviousDataFile, 'utf8')
-      //core.debug('Read file: ' + data) // -> can be very long...
       jsonData = JSON.parse(data)
     } catch (err) {
       core.debug(
@@ -229,13 +228,9 @@ module.exports.getBenchFromWeekAgo = function (
   );
 
   let benchmarks = data.entries[benchmarkGroupToCompare];
-  // Print the amount of benchmarks
-
   let closestBenchmark = null;
   let smallestDifference = Infinity;
 
-
-
   benchmarks.forEach(benchmark => {
     let difference = Math.abs(now - benchmark.date - ONE_WEEK_IN_MS);
     if (difference < smallestDifference) {
@@ -298,7 +293,6 @@ module.exports.getBenchmarkOfStableBranch = function (benchmarkGroupToCompare, f
         folderWithBenchData, fileNameWithBenchData
     );
   let benchmarks = data.entries[benchmarkGroupToCompare];
-  // find benchmark with commit sha == latestBenchSha
   let benchmark = benchmarks.find(benchmark => benchmark.commit.id === latestBenchSha);
   core.debug(`Benchmark of stable branch: ${JSON.stringify(benchmark)}`);
 
diff --git a/src/comment.js b/src/comment.js
index cdee7b75..9051ae0a 100644
--- a/src/comment.js
+++ b/src/comment.js
@@ -19,13 +19,13 @@ module.exports.createComment = function (
       commentBody = module.exports.createBodyForComparisonWithPrev(evaluationResult, completeConfig);
       break;
     case 'previous_successful':
-      commentBody = module.exports.createBodyForComparisonWithPrevSucc(evaluationResult, completeConfig);
+      commentBody = module.exports.createBodyForComparisonWithPrev(evaluationResult, completeConfig);
       break;
     case 'threshold_range':
       commentBody = module.exports.createBodyForComparisonWithThresholdRange(evaluationResult, completeConfig);
       break;
     case 'jump_detection':
-      commentBody = module.exports.createBodyForComparisonWithJumpDet(evaluationResult, completeConfig);
+      commentBody = module.exports.createBodyForComparisonWithJumpDeltas(evaluationResult, completeConfig);
       break;
     case 'trend_detection_moving_ave':
       commentBody = module.exports.createBodyForComparisonWithTrendDetMovAve(evaluationResult, completeConfig);
@@ -242,6 +242,79 @@ module.exports.createBodyForComparisonWithTrendDetDeltas = function(evaluationRe
 
   return lines.join('\n')
 }
+
+module.exports.createBodyForComparisonWithJumpDeltas = function(evaluationResult, completeConfig) {
+  core.debug('------ start createBodyForComparisonWithJumpDeltas ------')
+  const currentBenchmark = evaluationResult.referenceBenchmarks.current;
+  const previousBenchmark = evaluationResult.referenceBenchmarks.previous;
+
+  const lines = []
+  lines.push('## Benchmark results')
+  lines.push('')
+  lines.push(`<b>Benchmark group:</b> ${currentBenchmark.benchmarkGroupName}`)
+  lines.push('')
+  lines.push(`The chosen evaluation method is jump_detection.`)
+  lines.push(`For each metric, there is the following condition: 
+        The current value should not change more than X% (Max. ch in the table below) from the value measured for the previous benchmark.`)
+
+  const currentBenchmarkGroupName = currentBenchmark.benchmarkGroupName
+  const previousBenchmarkGroupName = previousBenchmark.benchmarkGroupName
+
+  if (currentBenchmarkGroupName !== previousBenchmarkGroupName) {
+    lines.push(
+        "<b>Note</b>: Benchmarks from different groups are being compared."
+    )
+  }
+  const benchDataText = module.exports.createBenchDataTextForCompWithPrev(
+      currentBenchmark,
+      previousBenchmark
+  )
+  lines.push(benchDataText)
+
+  lines.push('', '', '', '', '')
+  lines.push('## Results')
+  lines.push('', '', '', '', '')
+
+  lines.push(
+      `| Metric | Curr: ${currentBenchmark.commitInfo.id} | Prev: ${previousBenchmark.commitInfo.id} | Max. Jump | Was  | Res | `
+  )
+  lines.push('|-|-|-|-|-|-|-|')
+
+  const evaluationResults = evaluationResult.results.result
+  const evaluationParameters = evaluationResult.evalParameters
+  const evaluationConfiguration = completeConfig.evaluationConfig
+  for (let i = 0; i < evaluationResults.length; i++) {
+
+    const resultStatus = evaluationResults[i];
+    const metricName = evaluationParameters.metricNames[i];
+    const metricUnit = evaluationParameters.metricUnits[i];
+    const actualValue = parseFloat(evaluationParameters.is[i]).toFixed(2);
+
+    const shouldBe = evaluationParameters.shouldBe[i];
+    const ratio = evaluationParameters.is[i];
+
+    const previousBenchRes = parseFloat(evaluationParameters.than[i]).toFixed(2);
+    const prevBenchValAndUnit = previousBenchRes + ' ' + metricUnit;
+    let line
+    let valueAndUnit = actualValue + ' ' + metricUnit
+
+    if (resultStatus === 'failed' || resultStatus === 'passed') {
+      let betterOrWorse = resultStatus === 'passed' ? '🟢' : '🔴'
+      line = `| \`${metricName}\` | \`${valueAndUnit}\` | \`${prevBenchValAndUnit}\` | ${shouldBe} | ${ratio} | ${betterOrWorse} |`
+    } else {
+      line = `| \`${metricName}\` | \'${valueAndUnit}\' | N/A | N/A | N/A | 🔘 |`
+    }
+
+    lines.push(line)
+  }
+
+  const benchmarkPassed = module.exports.addInfoAboutBenchRes(lines, completeConfig, evaluationResults);
+  module.exports.alertUsersIfBenchFailed(benchmarkPassed, completeConfig, lines);
+  return lines.join('\n')
+}
+
+
+
 module.exports.createBenchDataText = function (currentBenchmark) {
   core.info('------ start createBenchDataText ------')
   const benchInfo = currentBenchmark.benchmarkInfo
@@ -478,7 +551,7 @@ module.exports.createBodyForComparisonWithThresholdRange = function (
 ///////////////////////
 /////////////////////// Summary
 ///////////////////////
-module.exports.createWorkflowSummaryForCompWithPrev = function (evaluationResult, completeConfig) {
+module.exports.createWorkflowSummaryForCompWithPrev = function (evaluationResult, completeConfig, successful) {
 
   const currentBenchmark = evaluationResult.referenceBenchmarks.current;
   const previousBenchmark = evaluationResult.referenceBenchmarks.previous;
diff --git a/src/main.js b/src/main.js
index f6764b99..db6fc261 100644
--- a/src/main.js
+++ b/src/main.js
@@ -109,7 +109,9 @@ async function run() {
     if (addJobSummary === 'on' || (addJobSummary === 'if_failed' && shouldFail)) {
 
       if (evaluationConfig.evaluationMethod === 'previous') {
-        createWorkflowSummaryForCompWithPrev(evaluationResult, completeConfig);
+        createWorkflowSummaryForCompWithPrev(evaluationResult, completeConfig, false);
+      } else if (evaluationConfig.evaluationMethod === 'previous_successful') {
+        createWorkflowSummaryForCompWithPrev(evaluationResult, completeConfig, true);
       } else if (evaluationConfig.evaluationMethod === 'threshold') {
         createWorkflowSummaryThreshold(evaluationResult, completeConfig);
       } else if (evaluationConfig.evaluationMethod === 'threshold_range') {