.github/workflows/run-experiment.yml

name: Run TestPilot experiment

on:
  workflow_dispatch:
    inputs:
      packages:
        description: "Packages to generate tests for"
        default: "+benchmarks.txt"
      snippetsFrom:
        description: "Code snippets source"
        default: "doc"
      numSnippets:
        description: 'Maximum number of snippets to include in each prompt, or "all"'
        default: "all"
      snippetLength:
        description: "Maximum length of each snippet in lines"
        default: "20"
      temperatures:
        description: "Sampling temperatures to try when obtaining completions (whitespace-separated)"
        default: "0.0"
      model:
        description: "Which LLM API to use"
        type: "string"
        default: "llama-3-70b-instruct"
      compareTo:
        description: "Run number of previous run to compare to (leave empty to skip comparison)"
        default: ""
      skipSlowBenchmarks:
        description: "Skip slow benchmarks"
        type: boolean
        default: false
      debug_enabled:
        type: boolean
        description: "Run the build with tmate debugging enabled (https://github.com/marketplace/actions/debugging-with-tmate)"
        default: false
  # Run every weekday at 2:00 AM UTC
  # schedule:
  #   - cron: '0 2 * * 1-5'

jobs:
  setup:
    runs-on: ubuntu-latest
    outputs:
      packages: "${{ steps.parse_packages.outputs.packages }}"
      snippetsFrom: "${{ github.event.inputs.snippetsFrom || 'doc' }}"
      snippetLength: "${{ github.event.inputs.snippetLength || '20' }}"
      temperatures: "${{ github.event.inputs.temperatures || '0.0' }}"
      numSnippets: "${{ github.event.inputs.numSnippets || 'all' }}"
      model: "${{ github.event.inputs.model || 'llama-3-70b-instruct' }}"
      template: './templates/template.hb'
      retryTemplate: './templates/retry-template.hb'
    steps:
      - uses: actions/checkout@v3

      - uses: actions/setup-node@v3
        with:
          node-version: 12

      - id: parse_packages
        run: |
          packages=$(node ${GITHUB_WORKSPACE}/.github/parse_packages.js \
            ${{ github.event.inputs.skipSlowBenchmarks == 'true' && '--skip-slow-benchmarks' || '' }} \
            "${{ github.event.inputs.packages || '+benchmarks.txt' }}")
          echo "packages=$packages" >> $GITHUB_OUTPUT

  benchmark:
    needs:
      - setup
    runs-on: ubuntu-latest
    continue-on-error: true
    strategy:
      fail-fast: false
      matrix:
        package: ${{ fromJson(needs.setup.outputs.packages) }}
    steps:
      - uses: actions/checkout@v3
        with:
          path: testpilot

      - name: Install CodeQL 2.17.6
        run: |
          wget -q https://github.com/github/codeql-action/releases/download/codeql-bundle-v2.17.6/codeql-bundle-linux64.tar.gz
          tar xzf codeql-bundle-linux64.tar.gz
          echo "$GITHUB_WORKSPACE/codeql" >> $GITHUB_PATH
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

      - name: Set up Node.js
        uses: actions/setup-node@v3
        with:
          node-version: 12

      - name: Set up TestPilot
        run: |
          cd testpilot
          npm run build
          cd ql
          codeql pack install

      - name: Checkout github package repo
        if: ${{ matrix.package.host == 'github.com' }}
        uses: actions/checkout@v3
        with:
          repository: ${{ format('{0}/{1}', matrix.package.owner, matrix.package.repo) }}
          ref: ${{ matrix.package.sha }}
          path: "source"

      - name: Checkout gitlab package repo
        if: ${{ matrix.package.host == 'gitlab.com' }}
        run: |
          git clone ${{ format('https://gitlab.com/{0}/{1}', matrix.package.owner, matrix.package.repo) }} source
          cd source
          git checkout ${{ matrix.package.sha }}

      - name: Determine package name
        id: pkg-name
        run: |
          # name of the package
          TESTPILOT_PACKAGE_NAME=$(cat source/${{ matrix.package.path }}/package.json | jq -r .name )

          # some packages have a / in their names (looking at you, gitlab-js!)
          if [[ "$TESTPILOT_PACKAGE_NAME" == *"/"* ]]; then
            TESTPILOT_PACKAGE_NAME=${TESTPILOT_PACKAGE_NAME##*/}
          fi

          # path to the package within the repo checkout
          TESTPILOT_PACKAGE_PATH="$GITHUB_WORKSPACE/$TESTPILOT_PACKAGE_NAME/${{ matrix.package.path }}"
          # make sure there isn't already a directory with the same name
          if [ -d "$TESTPILOT_PACKAGE_PATH" ]; then
            echo "ERROR: $TESTPILOT_PACKAGE_PATH already exists"
            exit 1
          fi
          # rename checkout, since some packages examine its name (looking at you, bluebird!)
          mv source $TESTPILOT_PACKAGE_NAME
          echo "Package name: $TESTPILOT_PACKAGE_NAME, path: $TESTPILOT_PACKAGE_PATH"
          # export environment variables
          echo "TESTPILOT_PACKAGE_NAME=$TESTPILOT_PACKAGE_NAME" >> $GITHUB_ENV
          echo "TESTPILOT_PACKAGE_PATH=$TESTPILOT_PACKAGE_PATH" >> $GITHUB_ENV
          echo "pkgName=$TESTPILOT_PACKAGE_NAME" >> $GITHUB_OUTPUT

      - name: Install package, its dependencies, and test packages
        run: |
          cd $TESTPILOT_PACKAGE_PATH
          npm i || npm i --legacy-peer-deps
          # if matrix.package.dependencies is not empty, install them
          if ! [ -z "${{ matrix.package.dependencies }}" ]; then
            npm i ${{ matrix.package.dependencies }}
          fi
          npm run build || npm run prepack || echo 'Error with npm run build and npm run prepack'
          npm i --no-save mocha

      - name: Create CodeQL database
        if: ${{ needs.setup.outputs.snippetsFrom == 'code' || needs.setup.outputs.snippetsFrom == 'both' }}
        run: |
          codeql database create --language=javascript "--source-root=$TESTPILOT_PACKAGE_PATH" -- ./db

      # - name: Setup tmate session
      #   uses: mxschmitt/action-tmate@v3
      #   if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }}    

      - name: Generate tests
        env:
          TESTPILOT_LLM_API_ENDPOINT: '${{ secrets.TESTPILOT_LLM_API_ENDPOINT }}'
          TESTPILOT_LLM_AUTH_HEADERS: '${{ secrets.TESTPILOT_LLM_AUTH_HEADERS }}'
        run: |
          cd testpilot
          outputdir="results/$TESTPILOT_PACKAGE_NAME"
          mkdir -p $outputdir 
          echo "Computing package statistics"
          node benchmark/package_stats.js "$TESTPILOT_PACKAGE_PATH" > stats.json
          echo "Generating tests for $TESTPILOT_PACKAGE_NAME"
          export command="node benchmark/run.js \
            --outputDir $outputdir \
            --package "$TESTPILOT_PACKAGE_PATH" \
            --temperatures "${{ needs.setup.outputs.temperatures }}" \
            --model ${{ needs.setup.outputs.model }} \
            --template ${{ needs.setup.outputs.template }} \
            --retryTemplate ${{ needs.setup.outputs.retryTemplate }}" 
          echo "command: $command"
          $command
          mv stats.json $outputdir

      - name: Calculate edit distance of generated tests
        run: |
          cd testpilot
          outputdir="results/$TESTPILOT_PACKAGE_NAME"
          node benchmark/editDistance.js --generatedTestsDir  $outputdir --existingTestsDir $TESTPILOT_PACKAGE_PATH --pkgName $TESTPILOT_PACKAGE_NAME
          mv similarityReport.json $outputdir

      - name: Add non-trivial coverage data
        run: |
          cd testpilot
          ./.github/non_trivial_coverage.sh "results/$TESTPILOT_PACKAGE_NAME"

      - name: Zip up results
        run: |
          cd testpilot
          zip -r results.zip results

      - name: Upload artifacts
        uses: actions/upload-artifact@v3
        with:
          name: results-${{ steps.pkg-name.outputs.pkgName }}
          path: "testpilot/results.zip"

      # - name: Setup tmate session
      #   uses: mxschmitt/action-tmate@v3
      #   if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }}

  combine_output:
    name: Combine output from all benchmarks
    needs:
      - setup
      - benchmark
    runs-on: ubuntu-latest
    steps:
      - name: Download output zips
        uses: actions/download-artifact@v2

      # - name: Setup tmate session
      #   uses: mxschmitt/action-tmate@v3
      #   if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }}

      - name: Combine output zips
        run: |
          mkdir results
          for zip in results-*/results.zip
          do
            unzip -oq $zip
          done
          zip -r results.zip results
      - name: Upload combined output files
        uses: actions/upload-artifact@v2
        with:
          name: results-all
          path: results.zip

  generate-report:
    needs:
      - setup
      - benchmark
      - combine_output
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v3

      - name: Set up Node.js
        uses: actions/setup-node@v3
        with:
          node-version: 12

      - name: Set up TestPilot
        run: |
          npm run build

      - name: Download artifacts for this run
        uses: actions/download-artifact@v3
        with:
          name: results-all
          path: results

      - name: Download artifacts for comparison run
        if: ${{ github.event.inputs.compareTo != '' }}
        uses: dawidd6/action-download-artifact@v2
        with:
          run_number: ${{ github.event.inputs.compareTo }}
          name: results-all
          path: baseline

      - name: print toJson(needs.setup.outputs for debugging
        run: |
          echo '${{ toJson(needs.setup.outputs) }}'

      - name: Setup tmate session
        uses: mxschmitt/action-tmate@v3
        if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }}

      - name: Generate report
        run: |
          cd results
          unzip results.zip
          cd ..

          echo '${{ toJson(needs.setup.outputs) }}' > config.json
          if [ -d baseline ]; then
            cd baseline
            unzip results.zip
            cd .. 
            baseline_artifact=baseline/results
          else
            baseline_artifact=''
          fi
          node ${GITHUB_WORKSPACE}/benchmark/generate_report.js ${{ needs.setup.outputs.model }} config.json results/results $baseline_artifact > $GITHUB_STEP_SUMMARY