benchmark-merge #732
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Merge Benchmarks | |
on: | |
repository_dispatch: | |
types: [ benchmark-merge ] | |
env: | |
SCRIPT_DIR: '.github/scripts/merge-perf' | |
RESULT_TABLE_NAME: 'merge_perf_results' | |
DOLTHUB_DB: 'import-perf/merge-perf' | |
jobs: | |
bench: | |
name: Benchmark | |
defaults: | |
run: | |
shell: bash | |
strategy: | |
fail-fast: true | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
ref: ${{ github.event.client_payload.version }} | |
- name: Set up Go 1.x | |
id: go | |
uses: actions/setup-go@v5 | |
with: | |
go-version-file: go/go.mod | |
- name: Setup Python 3.x | |
uses: actions/setup-python@v5 | |
with: | |
python-version: "3.10" | |
- name: Dolt version | |
id: version | |
run: | | |
version=${{ github.event.client_payload.version }} | |
- name: Install dolt | |
working-directory: ./go | |
run: go install ./cmd/dolt | |
- name: Config dolt | |
id: config | |
run: | | |
dolt config --global --add user.email "[email protected]" | |
dolt config --global --add user.name "merge-perf" | |
- name: Run bench | |
id: bench | |
run: | | |
gw=$GITHUB_WORKSPACE | |
DATADIR=$gw/data | |
# initialize results sql import | |
RESULTS=$gw/results.sql | |
echo "CREATE TABLE ${{env.RESULT_TABLE_NAME }} (name varchar(50) primary key, table_cnt int, run_cnt int, add_cnt int, delete_cnt int, update_cnt int, conflict_cnt int, fks bool, latency float);" >> $RESULTS | |
# parameters for testing | |
ROW_NUM=1000000 | |
TABLE_NUM=2 | |
EDIT_CNT=60000 | |
names=('adds_only' 'deletes_only' 'updates_only' 'adds_updates_deletes') | |
adds=($EDIT_CNT 0 0 $EDIT_CNT) | |
deletes=(0 $EDIT_CNT 0 $EDIT_CNT) | |
updates=(0 0 $EDIT_CNT $EDIT_CNT) | |
wd=$(pwd) | |
for i in {0..3}; do | |
cd $wd | |
echo "${names[$i]}, ${adds[$i]}, ${deletes[$i]}, ${updates[$i]}" | |
# data.py creates files for import | |
python ${{ env.SCRIPT_DIR }}/data.py $DATADIR $TABLE_NUM $ROW_NUM ${adds[$i]} ${deletes[$i]} ${updates[$i]} | |
# setup.sh runs the import and commit process for a set of data files | |
TMPDIR=$gw/tmp | |
./${{ env.SCRIPT_DIR}}/setup.sh $TMPDIR $DATADIR | |
# small python script times merge, we suppres errcodes but print error messages | |
cd $TMPDIR | |
python3 -c "import time, subprocess, sys; start = time.time(); res=subprocess.run(['dolt', 'merge', '--squash', 'main'], capture_output=True); err = res.stdout + res.stderr if res.returncode != 0 else ''; latency = time.time() -start; print(latency); sys.stderr.write(str(err))" 1> lat.log 2>err.log | |
latency=$(cat lat.log) | |
cat err.log | |
# count conflicts in first table | |
conflicts=$(dolt sql -r csv -q "select count(*) from dolt_conflicts_table0;" | tail -1) | |
echo "INSERT INTO ${{ env.RESULT_TABLE_NAME }} values ('"${names[$i]}"', $TABLE_NUM, $ROW_NUM, ${adds[$i]}, ${deletes[$i]}, ${updates[$i]}, $conflicts, true, $latency);" >> $RESULTS | |
done | |
echo "result_path=$RESULTS" >> $GITHUB_OUTPUT | |
- name: Report | |
id: report | |
run: | | |
gw=$GITHUB_WORKSPACE | |
in="${{ steps.bench.outputs.result_path }}" | |
query="select name, add_cnt, delete_cnt, update_cnt, round(latency, 2) as latency from ${{ env.RESULT_TABLE_NAME }}" | |
summaryq="select round(avg(latency), 2) as avg from ${{ env.RESULT_TABLE_NAME }}" | |
out="$gw/results.csv" | |
dolt_dir="$gw/merge-perf" | |
dolt config --global --add user.email "[email protected]" | |
dolt config --global --add user.name "merge-perf" | |
echo '${{ secrets.DOLTHUB_IMPORT_PERF_CREDS_VALUE }}' | dolt creds import | |
dolt clone ${{ env.DOLTHUB_DB }} "$dolt_dir" | |
cd "$dolt_dir" | |
branch="${{ github.event.client_payload.commit_to_branch }}" | |
# checkout branch | |
if [ -z $(dolt sql -q "select 1 from dolt_branches where name = '$branch';") ]; then | |
dolt checkout -b $branch | |
else | |
dolt checkout $branch | |
fi | |
dolt sql -q "drop table if exists ${{ env.RESULT_TABLE_NAME }}" | |
# load results | |
dolt sql < "$in" | |
# push results to dolthub | |
dolt add ${{ env.RESULT_TABLE_NAME }} | |
dolt commit -m "CI commit" | |
dolt push -f origin $branch | |
# generate report | |
dolt sql -r csv -q "$query" > "$out" | |
cat "$out" | |
echo "report_path=$out" >> $GITHUB_OUTPUT | |
avg=$(dolt sql -r csv -q "$summaryq" | tail -1) | |
echo "avg=$avg" >> $GITHUB_OUTPUT | |
- name: Format Results | |
id: html | |
if: ${{ github.event.client_payload.email_recipient }} != "" | |
run: | | |
gw="$GITHUB_WORKSPACE" | |
in="${{ steps.report.outputs.report_path }}" | |
out="$gw/results.html" | |
echo "<table>" > "$out" | |
print_header=true | |
while read line; do | |
if "$print_header"; then | |
echo " <tr><th>${line//,/</th><th>}</th></tr>" >> "$out" | |
print_header=false | |
continue | |
fi | |
echo " <tr><td>${line//,/</td><td>}</td></tr>" >> "$out" | |
done < "$in" | |
echo "</table>" >> "$out" | |
avg="${{ steps.report.outputs.avg }}" | |
echo "<table><tr><th>Average</th></tr><tr><td>$avg</tr></td></table>" >> "$out" | |
cat "$out" | |
echo "html=$(echo $out)" >> $GITHUB_OUTPUT | |
- name: Configure AWS Credentials | |
if: ${{ github.event.client_payload.email_recipient }} != "" | |
uses: aws-actions/configure-aws-credentials@v4 | |
with: | |
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} | |
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | |
aws-region: us-west-2 | |
- name: Send Email | |
uses: ./.github/actions/ses-email-action | |
if: ${{ github.event.client_payload.email_recipient }} != "" | |
with: | |
region: us-west-2 | |
toAddresses: '["${{ github.event.client_payload.email_recipient }}"]' | |
subject: 'Merge Performance Benchmarks: ${{ github.event.client_payload.version }}' | |
bodyPath: ${{ steps.html.outputs.html }} | |
template: 'SysbenchTemplate' | |
- name: Read CSV | |
if: ${{ github.event.client_payload.issue_id }} != "" | |
id: csv | |
uses: juliangruber/read-file-action@v1 | |
with: | |
path: "${{ steps.report.outputs.report_path }}" | |
- name: Create MD | |
if: ${{ github.event.client_payload.issue_id }} != "" | |
uses: dolthub/csv-to-md-table-action@v4 | |
id: md | |
with: | |
csvinput: ${{ steps.csv.outputs.content }} | |
- uses: mshick/add-pr-comment@v2 | |
if: ${{ github.event.client_payload.issue_id }} != "" | |
with: | |
repo-token: ${{ secrets.GITHUB_TOKEN }} | |
issue: ${{ github.event.client_payload.issue_id }} | |
message-failure: merge benchmark failed | |
message-cancelled: merge benchmark cancelled | |
allow-repeats: true | |
message: | | |
@${{ github.event.client_payload.actor }} __DOLT__ | |
${{ steps.md.outputs.markdown-table }} |