Run TestPilot experiment #35
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Run TestPilot experiment | |
on: | |
workflow_dispatch: | |
inputs: | |
packages: | |
description: "Packages to generate tests for" | |
default: "+benchmarks.txt" | |
snippetsFrom: | |
description: "Code snippets source" | |
default: "doc" | |
numSnippets: | |
description: 'Maximum number of snippets to include in each prompt, or "all"' | |
default: "all" | |
snippetLength: | |
description: "Maximum length of each snippet in lines" | |
default: "20" | |
temperatures: | |
description: "Sampling temperatures to try when obtaining completions (whitespace-separated)" | |
default: "0.0" | |
model: | |
description: "Which LLM API to use" | |
type: "string" | |
default: "llama-3-70b-instruct" | |
compareTo: | |
description: "Run number of previous run to compare to (leave empty to skip comparison)" | |
default: "" | |
skipSlowBenchmarks: | |
description: "Skip slow benchmarks" | |
type: boolean | |
default: false | |
debug_enabled: | |
type: boolean | |
description: "Run the build with tmate debugging enabled (https://github.com/marketplace/actions/debugging-with-tmate)" | |
default: false | |
# Run every weekday at 2:00 AM UTC | |
# schedule: | |
# - cron: '0 2 * * 1-5' | |
jobs: | |
setup: | |
runs-on: ubuntu-latest | |
outputs: | |
packages: "${{ steps.parse_packages.outputs.packages }}" | |
snippetsFrom: "${{ github.event.inputs.snippetsFrom || 'doc' }}" | |
snippetLength: "${{ github.event.inputs.snippetLength || '20' }}" | |
temperatures: "${{ github.event.inputs.temperatures || '0.0' }}" | |
numSnippets: "${{ github.event.inputs.numSnippets || 'all' }}" | |
model: "${{ github.event.inputs.model || 'llama-3-70b-instruct' }}" | |
template: './templates/template.hb' | |
retryTemplate: './templates/retry-template.hb' | |
steps: | |
- uses: actions/checkout@v3 | |
- uses: actions/setup-node@v3 | |
with: | |
node-version: 12 | |
- id: parse_packages | |
run: | | |
packages=$(node ${GITHUB_WORKSPACE}/.github/parse_packages.js \ | |
${{ github.event.inputs.skipSlowBenchmarks == 'true' && '--skip-slow-benchmarks' || '' }} \ | |
"${{ github.event.inputs.packages || '+benchmarks.txt' }}") | |
echo "packages=$packages" >> $GITHUB_OUTPUT | |
benchmark: | |
needs: | |
- setup | |
runs-on: ubuntu-latest | |
continue-on-error: true | |
strategy: | |
fail-fast: false | |
matrix: | |
package: ${{ fromJson(needs.setup.outputs.packages) }} | |
steps: | |
- uses: actions/checkout@v3 | |
with: | |
path: testpilot | |
- name: Install CodeQL 2.17.6 | |
run: | | |
wget -q https://github.com/github/codeql-action/releases/download/codeql-bundle-v2.17.6/codeql-bundle-linux64.tar.gz | |
tar xzf codeql-bundle-linux64.tar.gz | |
echo "$GITHUB_WORKSPACE/codeql" >> $GITHUB_PATH | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
- name: Set up Node.js | |
uses: actions/setup-node@v3 | |
with: | |
node-version: 12 | |
- name: Set up TestPilot | |
run: | | |
cd testpilot | |
npm run build | |
cd ql | |
codeql pack install | |
- name: Checkout github package repo | |
if: ${{ matrix.package.host == 'github.com' }} | |
uses: actions/checkout@v3 | |
with: | |
repository: ${{ format('{0}/{1}', matrix.package.owner, matrix.package.repo) }} | |
ref: ${{ matrix.package.sha }} | |
path: "source" | |
- name: Checkout gitlab package repo | |
if: ${{ matrix.package.host == 'gitlab.com' }} | |
run: | | |
git clone ${{ format('https://gitlab.com/{0}/{1}', matrix.package.owner, matrix.package.repo) }} source | |
cd source | |
git checkout ${{ matrix.package.sha }} | |
- name: Determine package name | |
id: pkg-name | |
run: | | |
# name of the package | |
TESTPILOT_PACKAGE_NAME=$(cat source/${{ matrix.package.path }}/package.json | jq -r .name ) | |
# some packages have a / in their names (looking at you, gitlab-js!) | |
if [[ "$TESTPILOT_PACKAGE_NAME" == *"/"* ]]; then | |
TESTPILOT_PACKAGE_NAME=${TESTPILOT_PACKAGE_NAME##*/} | |
fi | |
# path to the package within the repo checkout | |
TESTPILOT_PACKAGE_PATH="$GITHUB_WORKSPACE/$TESTPILOT_PACKAGE_NAME/${{ matrix.package.path }}" | |
# make sure there isn't already a directory with the same name | |
if [ -d "$TESTPILOT_PACKAGE_PATH" ]; then | |
echo "ERROR: $TESTPILOT_PACKAGE_PATH already exists" | |
exit 1 | |
fi | |
# rename checkout, since some packages examine its name (looking at you, bluebird!) | |
mv source $TESTPILOT_PACKAGE_NAME | |
echo "Package name: $TESTPILOT_PACKAGE_NAME, path: $TESTPILOT_PACKAGE_PATH" | |
# export environment variables | |
echo "TESTPILOT_PACKAGE_NAME=$TESTPILOT_PACKAGE_NAME" >> $GITHUB_ENV | |
echo "TESTPILOT_PACKAGE_PATH=$TESTPILOT_PACKAGE_PATH" >> $GITHUB_ENV | |
echo "pkgName=$TESTPILOT_PACKAGE_NAME" >> $GITHUB_OUTPUT | |
- name: Install package, its dependencies, and test packages | |
run: | | |
cd $TESTPILOT_PACKAGE_PATH | |
npm i || npm i --legacy-peer-deps | |
# if matrix.package.dependencies is not empty, install them | |
if ! [ -z "${{ matrix.package.dependencies }}" ]; then | |
npm i ${{ matrix.package.dependencies }} | |
fi | |
npm run build || npm run prepack || echo 'Error with npm run build and npm run prepack' | |
npm i --no-save mocha | |
- name: Create CodeQL database | |
if: ${{ needs.setup.outputs.snippetsFrom == 'code' || needs.setup.outputs.snippetsFrom == 'both' }} | |
run: | | |
codeql database create --language=javascript "--source-root=$TESTPILOT_PACKAGE_PATH" -- ./db | |
# - name: Setup tmate session | |
# uses: mxschmitt/action-tmate@v3 | |
# if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }} | |
- name: Generate tests | |
env: | |
TESTPILOT_LLM_API_ENDPOINT: '${{ secrets.TESTPILOT_LLM_API_ENDPOINT }}' | |
TESTPILOT_LLM_AUTH_HEADERS: '${{ secrets.TESTPILOT_LLM_AUTH_HEADERS }}' | |
run: | | |
cd testpilot | |
outputdir="results/$TESTPILOT_PACKAGE_NAME" | |
mkdir -p $outputdir | |
echo "Computing package statistics" | |
node benchmark/package_stats.js "$TESTPILOT_PACKAGE_PATH" > stats.json | |
echo "Generating tests for $TESTPILOT_PACKAGE_NAME" | |
export command="node benchmark/run.js \ | |
--outputDir $outputdir \ | |
--package "$TESTPILOT_PACKAGE_PATH" \ | |
--temperatures "${{ needs.setup.outputs.temperatures }}" \ | |
--model ${{ needs.setup.outputs.model }} \ | |
--template ${{ needs.setup.outputs.template }} \ | |
--retryTemplate ${{ needs.setup.outputs.retryTemplate }}" | |
echo "command: $command" | |
$command | |
mv stats.json $outputdir | |
- name: Calculate edit distance of generated tests | |
run: | | |
cd testpilot | |
outputdir="results/$TESTPILOT_PACKAGE_NAME" | |
node benchmark/editDistance.js --generatedTestsDir $outputdir --existingTestsDir $TESTPILOT_PACKAGE_PATH --pkgName $TESTPILOT_PACKAGE_NAME | |
mv similarityReport.json $outputdir | |
- name: Add non-trivial coverage data | |
run: | | |
cd testpilot | |
./.github/non_trivial_coverage.sh "results/$TESTPILOT_PACKAGE_NAME" | |
- name: Zip up results | |
run: | | |
cd testpilot | |
zip -r results.zip results | |
- name: Upload artifacts | |
uses: actions/upload-artifact@v3 | |
with: | |
name: results-${{ steps.pkg-name.outputs.pkgName }} | |
path: "testpilot/results.zip" | |
# - name: Setup tmate session | |
# uses: mxschmitt/action-tmate@v3 | |
# if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }} | |
combine_output: | |
name: Combine output from all benchmarks | |
needs: | |
- setup | |
- benchmark | |
runs-on: ubuntu-latest | |
steps: | |
- name: Download output zips | |
uses: actions/download-artifact@v2 | |
# - name: Setup tmate session | |
# uses: mxschmitt/action-tmate@v3 | |
# if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }} | |
- name: Combine output zips | |
run: | | |
mkdir results | |
for zip in results-*/results.zip | |
do | |
unzip -oq $zip | |
done | |
zip -r results.zip results | |
- name: Upload combined output files | |
uses: actions/upload-artifact@v2 | |
with: | |
name: results-all | |
path: results.zip | |
generate-report: | |
needs: | |
- setup | |
- benchmark | |
- combine_output | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Set up Node.js | |
uses: actions/setup-node@v3 | |
with: | |
node-version: 12 | |
- name: Set up TestPilot | |
run: | | |
npm run build | |
- name: Download artifacts for this run | |
uses: actions/download-artifact@v3 | |
with: | |
name: results-all | |
path: results | |
- name: Download artifacts for comparison run | |
if: ${{ github.event.inputs.compareTo != '' }} | |
uses: dawidd6/action-download-artifact@v2 | |
with: | |
run_number: ${{ github.event.inputs.compareTo }} | |
name: results-all | |
path: baseline | |
- name: print toJson(needs.setup.outputs for debugging | |
run: | | |
echo '${{ toJson(needs.setup.outputs) }}' | |
- name: Setup tmate session | |
uses: mxschmitt/action-tmate@v3 | |
if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }} | |
- name: Generate report | |
run: | | |
cd results | |
unzip results.zip | |
cd .. | |
echo '${{ toJson(needs.setup.outputs) }}' > config.json | |
if [ -d baseline ]; then | |
cd baseline | |
unzip results.zip | |
cd .. | |
baseline_artifact=baseline/results | |
else | |
baseline_artifact='' | |
fi | |
# node ${GITHUB_WORKSPACE}/benchmark/generate_report.js config.json results/results $baseline_artifact > $GITHUB_STEP_SUMMARY | |
# add model as first parameter | |
node ${GITHUB_WORKSPACE}/benchmark/generate_report.js ${{ needs.setup.outputs.model }} config.json results/results $baseline_artifact > $GITHUB_STEP_SUMMARY |