From f5e9b4cbca249296391e1c4fdf1272138c63c6ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dawid=20Niezg=C3=B3dka?= Date: Mon, 22 Jan 2024 17:35:26 +0100 Subject: [PATCH] add jump det comment --- .github/workflows/test.yml | 36 ++++++----- dist/index.js | 90 ++++++++++++++++++++++++---- resultsFolder/result_wg-medium2.json | 39 ------------ resultsFolder/result_wg-medium3.json | 39 ------------ resultsFolder/result_wg-medium4.json | 39 ------------ src/bench.js | 1 - src/bench_data.js | 6 -- src/comment.js | 79 +++++++++++++++++++++++- src/main.js | 4 +- 9 files changed, 175 insertions(+), 158 deletions(-) delete mode 100644 resultsFolder/result_wg-medium2.json delete mode 100644 resultsFolder/result_wg-medium3.json delete mode 100644 resultsFolder/result_wg-medium4.json diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 9994df0a..5f4a76a9 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -5,6 +5,7 @@ on: push: branches: - main + - missing_eval jobs: test: @@ -22,7 +23,7 @@ jobs: sparse-checkout-cone-mode: false path: 'bench_data' - - run: tree +# - run: tree - name: Evaluate benchmark uses: ./ @@ -33,26 +34,23 @@ jobs: file_with_bench_data: data.json folder_with_current_benchmark_results: resultsFolder bench_group_name: "MongoDB Benchmark" - evaluation_method: "threshold" - threshold_values: 2500,700,50000,5000,30000 - comparison_operators: tolerance,tolerance,tolerance,tolerance,tolerance - comparison_margins: 10,10,10,10,10 - result_files_merge_strategy_for_each_metric: "sum, average, average, average, average" + evaluation_method: "jump_detection" + jump_detection_thresholds: "10, 10, 10, 10, 10" save_curr_bench_res: true github_token: ${{ secrets.GH_TOKEN }} github_context: ${{ toJson(github) }} - - name: Commit files - working-directory: ./bench_data - run: | - git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com" - git config --local user.name "github-actions[bot]" - git commit -a -m "Add changes" - git push origin bench_data - - - name: Check output and fail if needed - if: steps.eval.outputs.should_fail == 'true' - run: | - echo "Failing as indicated by evaluation action" - exit 1 +# - name: Commit files +# working-directory: ./bench_data +# run: | +# git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com" +# git config --local user.name "github-actions[bot]" +# git commit -a -m "Add changes" +# git push origin bench_data +# +# - name: Check output and fail if needed +# if: steps.eval.outputs.should_fail == 'true' +# run: | +# echo "Failing as indicated by evaluation action" +# exit 1 diff --git a/dist/index.js b/dist/index.js index 1aa6fa69..840dc85f 100644 --- a/dist/index.js +++ b/dist/index.js @@ -30073,7 +30073,6 @@ module.exports.createCurrBench = function (config) { }).filter(result => result !== null); let commit; - // if config.eventName === schedule, then we will not have if (config.eventName === 'schedule') { core.info('The workflow was triggered by a scheduled event.'); commit = getCommitReplacementWhenTriggeredByScheduledEvent(config.runId); @@ -30125,7 +30124,6 @@ module.exports.addCompleteBenchmarkToFile = async ( core.debug(`Reading file at ${pathToPreviousDataFile}`) try { const data = await fs.readFile(pathToPreviousDataFile, 'utf8') - //core.debug('Read file: ' + data) // -> can be very long... jsonData = JSON.parse(data) } catch (err) { core.debug( @@ -30328,13 +30326,9 @@ module.exports.getBenchFromWeekAgo = function ( ); let benchmarks = data.entries[benchmarkGroupToCompare]; - // Print the amount of benchmarks - let closestBenchmark = null; let smallestDifference = Infinity; - - benchmarks.forEach(benchmark => { let difference = Math.abs(now - benchmark.date - ONE_WEEK_IN_MS); if (difference < smallestDifference) { @@ -30397,7 +30391,6 @@ module.exports.getBenchmarkOfStableBranch = function (benchmarkGroupToCompare, f folderWithBenchData, fileNameWithBenchData ); let benchmarks = data.entries[benchmarkGroupToCompare]; - // find benchmark with commit sha == latestBenchSha let benchmark = benchmarks.find(benchmark => benchmark.commit.id === latestBenchSha); core.debug(`Benchmark of stable branch: ${JSON.stringify(benchmark)}`); @@ -30438,13 +30431,13 @@ module.exports.createComment = function ( commentBody = module.exports.createBodyForComparisonWithPrev(evaluationResult, completeConfig); break; case 'previous_successful': - commentBody = module.exports.createBodyForComparisonWithPrevSucc(evaluationResult, completeConfig); + commentBody = module.exports.createBodyForComparisonWithPrev(evaluationResult, completeConfig); break; case 'threshold_range': commentBody = module.exports.createBodyForComparisonWithThresholdRange(evaluationResult, completeConfig); break; case 'jump_detection': - commentBody = module.exports.createBodyForComparisonWithJumpDet(evaluationResult, completeConfig); + commentBody = module.exports.createBodyForComparisonWithJumpDeltas(evaluationResult, completeConfig); break; case 'trend_detection_moving_ave': commentBody = module.exports.createBodyForComparisonWithTrendDetMovAve(evaluationResult, completeConfig); @@ -30661,6 +30654,79 @@ module.exports.createBodyForComparisonWithTrendDetDeltas = function(evaluationRe return lines.join('\n') } + +module.exports.createBodyForComparisonWithJumpDeltas = function(evaluationResult, completeConfig) { + core.debug('------ start createBodyForComparisonWithJumpDeltas ------') + const currentBenchmark = evaluationResult.referenceBenchmarks.current; + const previousBenchmark = evaluationResult.referenceBenchmarks.previous; + + const lines = [] + lines.push('## Benchmark results') + lines.push('') + lines.push(`Benchmark group: ${currentBenchmark.benchmarkGroupName}`) + lines.push('') + lines.push(`The chosen evaluation method is jump_detection.`) + lines.push(`For each metric, there is the following condition: + The current value should not change more than X% (Max. ch in the table below) from the value measured for the previous benchmark.`) + + const currentBenchmarkGroupName = currentBenchmark.benchmarkGroupName + const previousBenchmarkGroupName = previousBenchmark.benchmarkGroupName + + if (currentBenchmarkGroupName !== previousBenchmarkGroupName) { + lines.push( + "Note: Benchmarks from different groups are being compared." + ) + } + const benchDataText = module.exports.createBenchDataTextForCompWithPrev( + currentBenchmark, + previousBenchmark + ) + lines.push(benchDataText) + + lines.push('', '', '', '', '') + lines.push('## Results') + lines.push('', '', '', '', '') + + lines.push( + `| Metric | Curr: ${currentBenchmark.commitInfo.id} | Prev: ${previousBenchmark.commitInfo.id} | Max. Jump | Was | Res | ` + ) + lines.push('|-|-|-|-|-|-|-|') + + const evaluationResults = evaluationResult.results.result + const evaluationParameters = evaluationResult.evalParameters + const evaluationConfiguration = completeConfig.evaluationConfig + for (let i = 0; i < evaluationResults.length; i++) { + + const resultStatus = evaluationResults[i]; + const metricName = evaluationParameters.metricNames[i]; + const metricUnit = evaluationParameters.metricUnits[i]; + const actualValue = parseFloat(evaluationParameters.is[i]).toFixed(2); + + const shouldBe = evaluationParameters.shouldBe[i]; + const ratio = evaluationParameters.is[i]; + + const previousBenchRes = parseFloat(evaluationParameters.than[i]).toFixed(2); + const prevBenchValAndUnit = previousBenchRes + ' ' + metricUnit; + let line + let valueAndUnit = actualValue + ' ' + metricUnit + + if (resultStatus === 'failed' || resultStatus === 'passed') { + let betterOrWorse = resultStatus === 'passed' ? '🟢' : '🔴' + line = `| \`${metricName}\` | \`${valueAndUnit}\` | \`${prevBenchValAndUnit}\` | ${shouldBe} | ${ratio} | ${betterOrWorse} |` + } else { + line = `| \`${metricName}\` | \'${valueAndUnit}\' | N/A | N/A | N/A | 🔘 |` + } + + lines.push(line) + } + + const benchmarkPassed = module.exports.addInfoAboutBenchRes(lines, completeConfig, evaluationResults); + module.exports.alertUsersIfBenchFailed(benchmarkPassed, completeConfig, lines); + return lines.join('\n') +} + + + module.exports.createBenchDataText = function (currentBenchmark) { core.info('------ start createBenchDataText ------') const benchInfo = currentBenchmark.benchmarkInfo @@ -30897,7 +30963,7 @@ module.exports.createBodyForComparisonWithThresholdRange = function ( /////////////////////// /////////////////////// Summary /////////////////////// -module.exports.createWorkflowSummaryForCompWithPrev = function (evaluationResult, completeConfig) { +module.exports.createWorkflowSummaryForCompWithPrev = function (evaluationResult, completeConfig, successful) { const currentBenchmark = evaluationResult.referenceBenchmarks.current; const previousBenchmark = evaluationResult.referenceBenchmarks.previous; @@ -32665,7 +32731,9 @@ async function run() { if (addJobSummary === 'on' || (addJobSummary === 'if_failed' && shouldFail)) { if (evaluationConfig.evaluationMethod === 'previous') { - createWorkflowSummaryForCompWithPrev(evaluationResult, completeConfig); + createWorkflowSummaryForCompWithPrev(evaluationResult, completeConfig, false); + } else if (evaluationConfig.evaluationMethod === 'previous_successful') { + createWorkflowSummaryForCompWithPrev(evaluationResult, completeConfig, true); } else if (evaluationConfig.evaluationMethod === 'threshold') { createWorkflowSummaryThreshold(evaluationResult, completeConfig); } else if (evaluationConfig.evaluationMethod === 'threshold_range') { diff --git a/resultsFolder/result_wg-medium2.json b/resultsFolder/result_wg-medium2.json deleted file mode 100644 index a46aa452..00000000 --- a/resultsFolder/result_wg-medium2.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "benchInfo": { - "executionTime": "0m 11s", - "otherInfo": "YCSB Parameters: workload=workloada, recordcount=200000, threads=32, ", - "parametrization": { - "storageEngine": "N/A", - "logVerbosity": "N/A", - "bindIp": "0.0.0.0", - "port": 27017 - } - }, - "results": [ - { - "unit": "ops/sec", - "value": 16884.761502743775, - "name": "OVERALL Throughput" - }, - { - "unit": "us", - "value": 1765.6541831135073, - "name": "[READ], AverageLatency(us)" - }, - { - "unit": "us", - "value": 65407.0, - "name": "[READ], MaxLatency(us)" - }, - { - "unit": "us", - "value": 1866.0130910511919, - "name": "[UPDATE], AverageLatency(us)" - }, - { - "unit": "us", - "value": 156159.0, - "name": "[UPDATE], MaxLatency(us)" - } - ] -} \ No newline at end of file diff --git a/resultsFolder/result_wg-medium3.json b/resultsFolder/result_wg-medium3.json deleted file mode 100644 index 9b2a400e..00000000 --- a/resultsFolder/result_wg-medium3.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "benchInfo": { - "executionTime": "0m 11s", - "otherInfo": "YCSB Parameters: workload=workloada, recordcount=200000, threads=32, ", - "parametrization": { - "storageEngine": "N/A", - "logVerbosity": "N/A", - "bindIp": "0.0.0.0", - "port": 27017 - } - }, - "results": [ - { - "unit": "ops/sec", - "value": 16770.08217340265, - "name": "OVERALL Throughput" - }, - { - "unit": "us", - "value": 1817.7090485888857, - "name": "[READ], AverageLatency(us)" - }, - { - "unit": "us", - "value": 158847.0, - "name": "[READ], MaxLatency(us)" - }, - { - "unit": "us", - "value": 1861.29883843717, - "name": "[UPDATE], AverageLatency(us)" - }, - { - "unit": "us", - "value": 155135.0, - "name": "[UPDATE], MaxLatency(us)" - } - ] -} \ No newline at end of file diff --git a/resultsFolder/result_wg-medium4.json b/resultsFolder/result_wg-medium4.json deleted file mode 100644 index 7ca039ff..00000000 --- a/resultsFolder/result_wg-medium4.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "benchInfo": { - "executionTime": "0m 11s", - "otherInfo": "YCSB Parameters: workload=workloada, recordcount=200000, threads=32, ", - "parametrization": { - "storageEngine": "N/A", - "logVerbosity": "N/A", - "bindIp": "0.0.0.0", - "port": 27017 - } - }, - "results": [ - { - "unit": "ops/sec", - "value": 16810.960746406658, - "name": "OVERALL Throughput" - }, - { - "unit": "us", - "value": 1834.0088426016898, - "name": "[READ], AverageLatency(us)" - }, - { - "unit": "us", - "value": 159487.0, - "name": "[READ], MaxLatency(us)" - }, - { - "unit": "us", - "value": 1814.1897455816663, - "name": "[UPDATE], AverageLatency(us)" - }, - { - "unit": "us", - "value": 51807.0, - "name": "[UPDATE], MaxLatency(us)" - } - ] -} \ No newline at end of file diff --git a/src/bench.js b/src/bench.js index aaf7dc04..4a8ae447 100644 --- a/src/bench.js +++ b/src/bench.js @@ -30,7 +30,6 @@ module.exports.createCurrBench = function (config) { }).filter(result => result !== null); let commit; - // if config.eventName === schedule, then we will not have if (config.eventName === 'schedule') { core.info('The workflow was triggered by a scheduled event.'); commit = getCommitReplacementWhenTriggeredByScheduledEvent(config.runId); diff --git a/src/bench_data.js b/src/bench_data.js index 00eb9e48..6ebe439b 100644 --- a/src/bench_data.js +++ b/src/bench_data.js @@ -26,7 +26,6 @@ module.exports.addCompleteBenchmarkToFile = async ( core.debug(`Reading file at ${pathToPreviousDataFile}`) try { const data = await fs.readFile(pathToPreviousDataFile, 'utf8') - //core.debug('Read file: ' + data) // -> can be very long... jsonData = JSON.parse(data) } catch (err) { core.debug( @@ -229,13 +228,9 @@ module.exports.getBenchFromWeekAgo = function ( ); let benchmarks = data.entries[benchmarkGroupToCompare]; - // Print the amount of benchmarks - let closestBenchmark = null; let smallestDifference = Infinity; - - benchmarks.forEach(benchmark => { let difference = Math.abs(now - benchmark.date - ONE_WEEK_IN_MS); if (difference < smallestDifference) { @@ -298,7 +293,6 @@ module.exports.getBenchmarkOfStableBranch = function (benchmarkGroupToCompare, f folderWithBenchData, fileNameWithBenchData ); let benchmarks = data.entries[benchmarkGroupToCompare]; - // find benchmark with commit sha == latestBenchSha let benchmark = benchmarks.find(benchmark => benchmark.commit.id === latestBenchSha); core.debug(`Benchmark of stable branch: ${JSON.stringify(benchmark)}`); diff --git a/src/comment.js b/src/comment.js index cdee7b75..9051ae0a 100644 --- a/src/comment.js +++ b/src/comment.js @@ -19,13 +19,13 @@ module.exports.createComment = function ( commentBody = module.exports.createBodyForComparisonWithPrev(evaluationResult, completeConfig); break; case 'previous_successful': - commentBody = module.exports.createBodyForComparisonWithPrevSucc(evaluationResult, completeConfig); + commentBody = module.exports.createBodyForComparisonWithPrev(evaluationResult, completeConfig); break; case 'threshold_range': commentBody = module.exports.createBodyForComparisonWithThresholdRange(evaluationResult, completeConfig); break; case 'jump_detection': - commentBody = module.exports.createBodyForComparisonWithJumpDet(evaluationResult, completeConfig); + commentBody = module.exports.createBodyForComparisonWithJumpDeltas(evaluationResult, completeConfig); break; case 'trend_detection_moving_ave': commentBody = module.exports.createBodyForComparisonWithTrendDetMovAve(evaluationResult, completeConfig); @@ -242,6 +242,79 @@ module.exports.createBodyForComparisonWithTrendDetDeltas = function(evaluationRe return lines.join('\n') } + +module.exports.createBodyForComparisonWithJumpDeltas = function(evaluationResult, completeConfig) { + core.debug('------ start createBodyForComparisonWithJumpDeltas ------') + const currentBenchmark = evaluationResult.referenceBenchmarks.current; + const previousBenchmark = evaluationResult.referenceBenchmarks.previous; + + const lines = [] + lines.push('## Benchmark results') + lines.push('') + lines.push(`Benchmark group: ${currentBenchmark.benchmarkGroupName}`) + lines.push('') + lines.push(`The chosen evaluation method is jump_detection.`) + lines.push(`For each metric, there is the following condition: + The current value should not change more than X% (Max. ch in the table below) from the value measured for the previous benchmark.`) + + const currentBenchmarkGroupName = currentBenchmark.benchmarkGroupName + const previousBenchmarkGroupName = previousBenchmark.benchmarkGroupName + + if (currentBenchmarkGroupName !== previousBenchmarkGroupName) { + lines.push( + "Note: Benchmarks from different groups are being compared." + ) + } + const benchDataText = module.exports.createBenchDataTextForCompWithPrev( + currentBenchmark, + previousBenchmark + ) + lines.push(benchDataText) + + lines.push('', '', '', '', '') + lines.push('## Results') + lines.push('', '', '', '', '') + + lines.push( + `| Metric | Curr: ${currentBenchmark.commitInfo.id} | Prev: ${previousBenchmark.commitInfo.id} | Max. Jump | Was | Res | ` + ) + lines.push('|-|-|-|-|-|-|-|') + + const evaluationResults = evaluationResult.results.result + const evaluationParameters = evaluationResult.evalParameters + const evaluationConfiguration = completeConfig.evaluationConfig + for (let i = 0; i < evaluationResults.length; i++) { + + const resultStatus = evaluationResults[i]; + const metricName = evaluationParameters.metricNames[i]; + const metricUnit = evaluationParameters.metricUnits[i]; + const actualValue = parseFloat(evaluationParameters.is[i]).toFixed(2); + + const shouldBe = evaluationParameters.shouldBe[i]; + const ratio = evaluationParameters.is[i]; + + const previousBenchRes = parseFloat(evaluationParameters.than[i]).toFixed(2); + const prevBenchValAndUnit = previousBenchRes + ' ' + metricUnit; + let line + let valueAndUnit = actualValue + ' ' + metricUnit + + if (resultStatus === 'failed' || resultStatus === 'passed') { + let betterOrWorse = resultStatus === 'passed' ? '🟢' : '🔴' + line = `| \`${metricName}\` | \`${valueAndUnit}\` | \`${prevBenchValAndUnit}\` | ${shouldBe} | ${ratio} | ${betterOrWorse} |` + } else { + line = `| \`${metricName}\` | \'${valueAndUnit}\' | N/A | N/A | N/A | 🔘 |` + } + + lines.push(line) + } + + const benchmarkPassed = module.exports.addInfoAboutBenchRes(lines, completeConfig, evaluationResults); + module.exports.alertUsersIfBenchFailed(benchmarkPassed, completeConfig, lines); + return lines.join('\n') +} + + + module.exports.createBenchDataText = function (currentBenchmark) { core.info('------ start createBenchDataText ------') const benchInfo = currentBenchmark.benchmarkInfo @@ -478,7 +551,7 @@ module.exports.createBodyForComparisonWithThresholdRange = function ( /////////////////////// /////////////////////// Summary /////////////////////// -module.exports.createWorkflowSummaryForCompWithPrev = function (evaluationResult, completeConfig) { +module.exports.createWorkflowSummaryForCompWithPrev = function (evaluationResult, completeConfig, successful) { const currentBenchmark = evaluationResult.referenceBenchmarks.current; const previousBenchmark = evaluationResult.referenceBenchmarks.previous; diff --git a/src/main.js b/src/main.js index f6764b99..db6fc261 100644 --- a/src/main.js +++ b/src/main.js @@ -109,7 +109,9 @@ async function run() { if (addJobSummary === 'on' || (addJobSummary === 'if_failed' && shouldFail)) { if (evaluationConfig.evaluationMethod === 'previous') { - createWorkflowSummaryForCompWithPrev(evaluationResult, completeConfig); + createWorkflowSummaryForCompWithPrev(evaluationResult, completeConfig, false); + } else if (evaluationConfig.evaluationMethod === 'previous_successful') { + createWorkflowSummaryForCompWithPrev(evaluationResult, completeConfig, true); } else if (evaluationConfig.evaluationMethod === 'threshold') { createWorkflowSummaryThreshold(evaluationResult, completeConfig); } else if (evaluationConfig.evaluationMethod === 'threshold_range') {