From c78dbd2752cf8aa84f366c1c532070c1f106f436 Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Mon, 25 Sep 2023 21:50:25 +0100 Subject: [PATCH] fix: remove extra workflow --- .github/workflows/chore-deploy-gcp-tests.yml | 966 ------------------- 1 file changed, 966 deletions(-) delete mode 100644 .github/workflows/chore-deploy-gcp-tests.yml diff --git a/.github/workflows/chore-deploy-gcp-tests.yml b/.github/workflows/chore-deploy-gcp-tests.yml deleted file mode 100644 index 78acc9ae27a..00000000000 --- a/.github/workflows/chore-deploy-gcp-tests.yml +++ /dev/null @@ -1,966 +0,0 @@ -name: Deploy GCP tests - -on: - workflow_call: - inputs: - # Status and logging - test_id: - required: true - type: string - description: 'Unique identifier for the test' - test_description: - required: true - type: string - description: 'Explains what the test does' - height_grep_text: - required: false - type: string - description: 'Regular expression to find the tip height in test logs, and add it to newly created cached state image metadata' - - # Test selection and parameters - test_variables: - required: true - type: string - description: 'Environmental variables used to select and configure the test' - network: - required: false - type: string - default: Mainnet - description: 'Zcash network to test against' - is_long_test: - required: false - type: boolean - default: false - description: 'Does this test need multiple run jobs? (Does it run longer than 6 hours?)' - - # Cached state - # - # TODO: find a better name - root_state_path: - required: false - type: string - default: '/zebrad-cache' - description: 'Cached state base directory path' - # TODO: find a better name - zebra_state_dir: - required: false - type: string - default: '' - description: 'Zebra cached state directory and input image prefix to search in GCP' - # TODO: find a better name - lwd_state_dir: - required: false - type: string - default: '' - description: 'Lightwalletd cached state directory and input image prefix to search in GCP' - disk_prefix: - required: false - type: string - default: 'zebrad-cache' - description: 'Image name prefix, and `zebra_state_dir` name for newly created cached states' - disk_suffix: - required: false - type: string - description: 'Image name suffix' - needs_zebra_state: - required: true - type: boolean - description: 'Does the test use Zebra cached state?' - needs_lwd_state: - required: false - type: boolean - description: 'Does the test use Lightwalletd and Zebra cached state?' - # main branch states can be outdated and slower, but they can also be more reliable - prefer_main_cached_state: - required: false - type: boolean - default: false - description: 'Does the test prefer to use a main branch cached state?' - saves_to_disk: - required: true - type: boolean - description: 'Can this test create new or updated cached state disks?' - force_save_to_disk: - required: false - type: boolean - default: false - description: 'Force this test to create a new or updated cached state disk' - app_name: - required: false - type: string - default: 'zebra' - description: 'Application name, used to work out when a job is an update job' - -env: - # How many previous log lines we show at the start of each new log job. - # Increase this number if some log lines are skipped between jobs - # - # We want to show all the logs since the last job finished, - # but we don't know how long it will be between jobs. - # 200 lines is about 6-15 minutes of sync logs, or one panic log. - EXTRA_LOG_LINES: 200 - # How many blocks to wait before creating an updated cached state image. - # 1 day is approximately 1152 blocks. - CACHED_STATE_UPDATE_LIMIT: 576 - -jobs: - # set up and launch the test, if it doesn't use any cached state - # each test runs one of the *-with/without-cached-state job series, and skips the other - launch-without-cached-state: - name: Launch ${{ inputs.test_id }} test - if: ${{ !inputs.needs_zebra_state }} - runs-on: zfnd-runners - permissions: - contents: 'read' - id-token: 'write' - steps: - - uses: actions/checkout@v4.0.0 - with: - persist-credentials: false - fetch-depth: '2' - - uses: r7kamura/rust-problem-matchers@v1.4.0 - - - name: Inject slug/short variables - uses: rlespinasse/github-slug-action@v4 - with: - short-length: 7 - - # Makes the Zcash network name lowercase. - # - # Labels in GCP are required to be in lowercase, but the blockchain network - # uses sentence case, so we need to downcase ${{ inputs.network }}. - # - # Passes ${{ inputs.network }} to subsequent steps using $NETWORK env variable. - - name: Downcase network name for labels - run: | - NETWORK_CAPS="${{ inputs.network }}" - echo "NETWORK=${NETWORK_CAPS,,}" >> "$GITHUB_ENV" - - # Install our SSH secret - - name: Install private SSH key - uses: shimataro/ssh-key-action@v2.5.1 - with: - key: ${{ secrets.GCP_SSH_PRIVATE_KEY }} - name: google_compute_engine - known_hosts: unnecessary - - - name: Generate public SSH key - run: | - sudo apt-get update && sudo apt-get -qq install -y --no-install-recommends openssh-client - ssh-keygen -y -f ~/.ssh/google_compute_engine > ~/.ssh/google_compute_engine.pub - - # Setup gcloud CLI - - name: Authenticate to Google Cloud - id: auth - uses: google-github-actions/auth@v1.1.1 - with: - retries: '3' - workload_identity_provider: '${{ vars.GCP_WIF }}' - service_account: '${{ vars.GCP_DEPLOYMENTS_SA }}' - - - name: Set up Cloud SDK - uses: google-github-actions/setup-gcloud@v1.1.1 - - # Create a Compute Engine virtual machine - - name: Create ${{ inputs.test_id }} GCP compute instance - id: create-instance - run: | - gcloud compute instances create-with-container "${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" \ - --boot-disk-size 300GB \ - --boot-disk-type pd-ssd \ - --image-project=cos-cloud \ - --image-family=cos-stable \ - --create-disk=name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",device-name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",size=300GB,type=pd-ssd \ - --container-image=gcr.io/google-containers/busybox \ - --machine-type ${{ vars.GCP_LARGE_MACHINE }} \ - --network-interface=subnet=${{ vars.GCP_SUBNETWORK }} \ - --scopes cloud-platform \ - --metadata=google-monitoring-enabled=TRUE,google-logging-enabled=TRUE \ - --metadata-from-file=startup-script=.github/workflows/scripts/gcp-vm-startup-script.sh \ - --labels=app=${{ inputs.app_name }},environment=test,network=${NETWORK},github_ref=${{ env.GITHUB_REF_SLUG_URL }},test=${{ inputs.test_id }} \ - --tags ${{ inputs.app_name }} \ - --zone ${{ vars.GCP_ZONE }} - sleep 60 - - # Create a docker volume with the new disk we just created. - # - # SSH into the just created VM, and create a docker volume with the newly created disk. - - name: Create ${{ inputs.test_id }} Docker volume - run: | - gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ - --zone ${{ vars.GCP_ZONE }} \ - --ssh-flag="-o ServerAliveInterval=5" \ - --ssh-flag="-o ConnectionAttempts=20" \ - --ssh-flag="-o ConnectTimeout=5" \ - --command \ - "\ - sudo mkfs.ext4 -v /dev/sdb \ - && \ - sudo docker volume create --driver local --opt type=ext4 --opt device=/dev/sdb \ - ${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }} \ - " - - # Launch the test without any cached state - - name: Launch ${{ inputs.test_id }} test - run: | - gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ - --zone ${{ vars.GCP_ZONE }} \ - --ssh-flag="-o ServerAliveInterval=5" \ - --ssh-flag="-o ConnectionAttempts=20" \ - --ssh-flag="-o ConnectTimeout=5" \ - --command \ - "\ - sudo docker run \ - --name ${{ inputs.test_id }} \ - --tty \ - --detach \ - ${{ inputs.test_variables }} \ - --mount type=volume,src=${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }},dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} \ - ${{ vars.GAR_BASE }}/${{ vars.CI_IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \ - " - - # set up and launch the test, if it uses cached state - # each test runs one of the *-with/without-cached-state job series, and skips the other - launch-with-cached-state: - name: Launch ${{ inputs.test_id }} test - if: ${{ inputs.needs_zebra_state }} - runs-on: zfnd-runners - outputs: - cached_disk_name: ${{ steps.get-disk-name.outputs.cached_disk_name }} - permissions: - contents: 'read' - id-token: 'write' - steps: - - uses: actions/checkout@v4.0.0 - with: - persist-credentials: false - fetch-depth: '2' - - uses: r7kamura/rust-problem-matchers@v1.4.0 - - - name: Inject slug/short variables - uses: rlespinasse/github-slug-action@v4 - with: - short-length: 7 - - - name: Downcase network name for disks and labels - run: | - NETWORK_CAPS="${{ inputs.network }}" - echo "NETWORK=${NETWORK_CAPS,,}" >> "$GITHUB_ENV" - - # Install our SSH secret - - name: Install private SSH key - uses: shimataro/ssh-key-action@v2.5.1 - with: - key: ${{ secrets.GCP_SSH_PRIVATE_KEY }} - name: google_compute_engine - known_hosts: unnecessary - - - name: Generate public SSH key - run: | - sudo apt-get update && sudo apt-get -qq install -y --no-install-recommends openssh-client - ssh-keygen -y -f ~/.ssh/google_compute_engine > ~/.ssh/google_compute_engine.pub - - # Setup gcloud CLI - - name: Authenticate to Google Cloud - id: auth - uses: google-github-actions/auth@v1.1.1 - with: - retries: '3' - workload_identity_provider: '${{ vars.GCP_WIF }}' - service_account: '${{ vars.GCP_DEPLOYMENTS_SA }}' - - - name: Set up Cloud SDK - uses: google-github-actions/setup-gcloud@v1.1.1 - - # Find a cached state disk for this job, matching all of: - # - disk cached state (lwd_state_dir/zebra_state_dir or disk_prefix) - zebrad-cache or lwd-cache - # - state version (from the source code) - v{N} - # - network (network) - mainnet or testnet - # - disk target height kind (disk_suffix) - checkpoint or tip - # - # If the test needs a lightwalletd state (needs_lwd_state) set the variable DISK_PREFIX accordingly - # - To ${{ inputs.lwd_state_dir }}" if needed - # - To ${{ inputs.zebra_state_dir || inputs.disk_prefix }} if not - # - # If there are multiple disks: - # - prefer images generated from the same commit, then - # - if prefer_main_cached_state is true, prefer images from the `main` branch, then - # - use any images from any other branch or commit. - # Within each of these categories: - # - prefer newer images to older images - # - # Passes the disk name to subsequent steps using $CACHED_DISK_NAME env variable - # Passes the state version to subsequent steps using $STATE_VERSION env variable - # - # TODO: move this script into a file, and call it from manual-find-cached-disks.yml as well. - - name: Find ${{ inputs.test_id }} cached state disk - id: get-disk-name - run: | - LOCAL_STATE_VERSION=$(grep -oE "DATABASE_FORMAT_VERSION: .* [0-9]+" "$GITHUB_WORKSPACE/zebra-state/src/constants.rs" | grep -oE "[0-9]+" | tail -n1) - echo "STATE_VERSION: $LOCAL_STATE_VERSION" - - if [[ "${{ inputs.needs_lwd_state }}" == "true" ]]; then - DISK_PREFIX=${{ inputs.lwd_state_dir }} - else - DISK_PREFIX=${{ inputs.zebra_state_dir || inputs.disk_prefix }} - fi - - # Try to find an image generated from a previous step or run of this commit. - # Fields are listed in the "Create image from state disk" step. - # - # We don't want to match the full branch name here, because: - # - we want to ignore the different GITHUB_REFs across manually triggered jobs, - # pushed branches, and PRs, - # - previous commits might have been buggy, - # or they might have worked and hide bugs in this commit - # (we can't avoid this issue entirely, but we don't want to make it more likely), and - # - the branch name might have been shortened for the image. - # - # The probability of two matching short commit hashes within the same month is very low. - COMMIT_DISK_PREFIX="${DISK_PREFIX}-.+-${{ env.GITHUB_SHA_SHORT }}-v${LOCAL_STATE_VERSION}-${NETWORK}-${{ inputs.disk_suffix }}" - COMMIT_CACHED_DISK_NAME=$(gcloud compute images list --filter="status=READY AND name~${COMMIT_DISK_PREFIX}" --format="value(NAME)" --sort-by=~creationTimestamp --limit=1) - echo "${GITHUB_REF_SLUG_URL}-${{ env.GITHUB_SHA_SHORT }} Disk: $COMMIT_CACHED_DISK_NAME" - if [[ -n "$COMMIT_CACHED_DISK_NAME" ]]; then - echo "Description: $(gcloud compute images describe $COMMIT_CACHED_DISK_NAME --format='value(DESCRIPTION)')" - fi - - # Try to find an image generated from the main branch - MAIN_CACHED_DISK_NAME=$(gcloud compute images list --filter="status=READY AND name~${DISK_PREFIX}-main-[0-9a-f]+-v${LOCAL_STATE_VERSION}-${NETWORK}-${{ inputs.disk_suffix }}" --format="value(NAME)" --sort-by=~creationTimestamp --limit=1) - echo "main Disk: $MAIN_CACHED_DISK_NAME" - if [[ -n "$MAIN_CACHED_DISK_NAME" ]]; then - echo "Description: $(gcloud compute images describe $MAIN_CACHED_DISK_NAME --format='value(DESCRIPTION)')" - fi - - # Try to find an image generated from any other branch - ANY_CACHED_DISK_NAME=$(gcloud compute images list --filter="status=READY AND name~${DISK_PREFIX}-.+-[0-9a-f]+-v${LOCAL_STATE_VERSION}-${NETWORK}-${{ inputs.disk_suffix }}" --format="value(NAME)" --sort-by=~creationTimestamp --limit=1) - echo "any branch Disk: $ANY_CACHED_DISK_NAME" - if [[ -n "$ANY_CACHED_DISK_NAME" ]]; then - echo "Description: $(gcloud compute images describe $ANY_CACHED_DISK_NAME --format='value(DESCRIPTION)')" - fi - - # Select a cached disk based on the job settings - CACHED_DISK_NAME="$COMMIT_CACHED_DISK_NAME" - if [[ -z "$CACHED_DISK_NAME" ]] && [[ "${{ inputs.prefer_main_cached_state }}" == "true" ]]; then - echo "Preferring main branch cached state to other branches..." - CACHED_DISK_NAME="$MAIN_CACHED_DISK_NAME" - fi - if [[ -z "$CACHED_DISK_NAME" ]]; then - CACHED_DISK_NAME="$ANY_CACHED_DISK_NAME" - fi - - if [[ -z "$CACHED_DISK_NAME" ]]; then - echo "No cached state disk available" - echo "Expected ${COMMIT_DISK_PREFIX}" - echo "Also searched for cached disks from other branches" - echo "Cached state test jobs must depend on the cached state rebuild job" - exit 1 - fi - - echo "Selected Disk: $CACHED_DISK_NAME" - echo "cached_disk_name=$CACHED_DISK_NAME" >> "$GITHUB_OUTPUT" - - echo "STATE_VERSION=$LOCAL_STATE_VERSION" >> "$GITHUB_ENV" - echo "CACHED_DISK_NAME=$CACHED_DISK_NAME" >> "$GITHUB_ENV" - - # Create a Compute Engine virtual machine and attach a cached state disk using the - # $CACHED_DISK_NAME variable as the source image to populate the disk cached state - - name: Create ${{ inputs.test_id }} GCP compute instance - id: create-instance - run: | - gcloud compute instances create-with-container "${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" \ - --boot-disk-size 300GB \ - --boot-disk-type pd-ssd \ - --image-project=cos-cloud \ - --image-family=cos-stable \ - --create-disk=image=${{ env.CACHED_DISK_NAME }},name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",device-name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",size=300GB,type=pd-ssd \ - --container-image=gcr.io/google-containers/busybox \ - --machine-type ${{ vars.GCP_LARGE_MACHINE }} \ - --network-interface=subnet=${{ vars.GCP_SUBNETWORK }} \ - --scopes cloud-platform \ - --metadata=google-monitoring-enabled=TRUE,google-logging-enabled=TRUE \ - --metadata-from-file=startup-script=.github/workflows/scripts/gcp-vm-startup-script.sh \ - --labels=app=${{ inputs.app_name }},environment=test,network=${NETWORK},github_ref=${{ env.GITHUB_REF_SLUG_URL }},test=${{ inputs.test_id }} \ - --tags ${{ inputs.app_name }} \ - --zone ${{ vars.GCP_ZONE }} - sleep 60 - - # Create a docker volume with the selected cached state. - # - # SSH into the just created VM and create a docker volume with the recently attached disk. - # (The cached state and disk are usually the same size, - # but the cached state can be smaller if we just increased the disk size.) - - name: Create ${{ inputs.test_id }} Docker volume - run: | - gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ - --zone ${{ vars.GCP_ZONE }} \ - --ssh-flag="-o ServerAliveInterval=5" \ - --ssh-flag="-o ConnectionAttempts=20" \ - --ssh-flag="-o ConnectTimeout=5" \ - --command \ - "\ - sudo docker volume create --driver local --opt type=ext4 --opt device=/dev/sdb \ - ${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }} \ - " - - # Launch the test with the previously created Zebra-only cached state. - # Each test runs one of the "Launch test" steps, and skips the other. - # - # SSH into the just created VM, and create a Docker container to run the incoming test - # from ${{ inputs.test_id }}, then mount the sudo docker volume created in the previous job. - # - # The disk mounted in the VM is located at /dev/sdb, we mount the root `/` of this disk to the docker - # container in one path: - # - /var/cache/zebrad-cache -> ${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} -> $ZEBRA_CACHED_STATE_DIR - # - # This path must match the variable used by the tests in Rust, which are also set in - # `ci-unit-tests-docker.yml` to be able to run this tests. - # - # Although we're mounting the disk root, Zebra will only respect the values from - # $ZEBRA_CACHED_STATE_DIR. The inputs like ${{ inputs.zebra_state_dir }} are only used - # to match that variable paths. - - name: Launch ${{ inputs.test_id }} test - # This step only runs for tests that just read or write a Zebra state. - # - # lightwalletd-full-sync reads Zebra and writes lwd, so it is handled specially. - # TODO: we should find a better logic for this use cases - if: ${{ (inputs.needs_zebra_state && !inputs.needs_lwd_state) && inputs.test_id != 'lwd-full-sync' }} - run: | - gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ - --zone ${{ vars.GCP_ZONE }} \ - --ssh-flag="-o ServerAliveInterval=5" \ - --ssh-flag="-o ConnectionAttempts=20" \ - --ssh-flag="-o ConnectTimeout=5" \ - --command \ - "\ - sudo docker run \ - --name ${{ inputs.test_id }} \ - --tty \ - --detach \ - ${{ inputs.test_variables }} \ - --mount type=volume,src=${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }},dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} \ - ${{ vars.GAR_BASE }}/${{ vars.CI_IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \ - " - - # Launch the test with the previously created Lightwalletd and Zebra cached state. - # Each test runs one of the "Launch test" steps, and skips the other. - # - # SSH into the just created VM, and create a Docker container to run the incoming test - # from ${{ inputs.test_id }}, then mount the sudo docker volume created in the previous job. - # - # In this step we're using the same disk for simplicity, as mounting multiple disks to the - # VM and to the container might require more steps in this workflow, and additional - # considerations. - # - # The disk mounted in the VM is located at /dev/sdb, we mount the root `/` of this disk to the docker - # container in two different paths: - # - /var/cache/zebrad-cache -> ${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} -> $ZEBRA_CACHED_STATE_DIR - # - /var/cache/lwd-cache -> ${{ inputs.root_state_path }}/${{ inputs.lwd_state_dir }} -> $LIGHTWALLETD_DATA_DIR - # - # This doesn't cause any path conflicts, because Zebra and lightwalletd create different - # subdirectories for their data. (But Zebra, lightwalletd, and the test harness must not - # delete the whole cache directory.) - # - # This paths must match the variables used by the tests in Rust, which are also set in - # `ci-unit-tests-docker.yml` to be able to run this tests. - # - # Although we're mounting the disk root to both directories, Zebra and Lightwalletd - # will only respect the values from $ZEBRA_CACHED_STATE_DIR and $LIGHTWALLETD_DATA_DIR, - # the inputs like ${{ inputs.lwd_state_dir }} are only used to match those variables paths. - - name: Launch ${{ inputs.test_id }} test - # This step only runs for tests that read or write Lightwalletd and Zebra states. - # - # lightwalletd-full-sync reads Zebra and writes lwd, so it is handled specially. - # TODO: we should find a better logic for this use cases - if: ${{ (inputs.needs_zebra_state && inputs.needs_lwd_state) || inputs.test_id == 'lwd-full-sync' }} - run: | - gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ - --zone ${{ vars.GCP_ZONE }} \ - --ssh-flag="-o ServerAliveInterval=5" \ - --ssh-flag="-o ConnectionAttempts=20" \ - --ssh-flag="-o ConnectTimeout=5" \ - --command \ - "\ - sudo docker run \ - --name ${{ inputs.test_id }} \ - --tty \ - --detach \ - ${{ inputs.test_variables }} \ - --mount type=volume,src=${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }},dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} \ - --mount type=volume,src=${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }},dst=${{ inputs.root_state_path }}/${{ inputs.lwd_state_dir }} \ - ${{ vars.GAR_BASE }}/${{ vars.CI_IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \ - " - - # Show all the test logs, then follow the logs of the test we just launched, until it finishes. - # Then check the result of the test. - # - # If `inputs.is_long_test` is `true`, the timeout is 5 days, otherwise it's 3 hours. - test-result: - name: Run ${{ inputs.test_id }} test - # We run exactly one of without-cached-state or with-cached-state, and we always skip the other one. - needs: [ launch-with-cached-state, launch-without-cached-state ] - # If the previous job fails, we also want to run and fail this job, - # so that the branch protection rule fails in Mergify and GitHub. - if: ${{ !cancelled() }} - timeout-minutes: ${{ inputs.is_long_test && 7200 || 180 }} - runs-on: zfnd-runners - permissions: - contents: 'read' - id-token: 'write' - steps: - - uses: actions/checkout@v4.0.0 - with: - persist-credentials: false - fetch-depth: '2' - - - name: Inject slug/short variables - uses: rlespinasse/github-slug-action@v4 - with: - short-length: 7 - - # Install our SSH secret - - name: Install private SSH key - uses: shimataro/ssh-key-action@v2.5.1 - with: - key: ${{ secrets.GCP_SSH_PRIVATE_KEY }} - name: google_compute_engine - known_hosts: unnecessary - - - name: Generate public SSH key - run: | - sudo apt-get update && sudo apt-get -qq install -y --no-install-recommends openssh-client - ssh-keygen -y -f ~/.ssh/google_compute_engine > ~/.ssh/google_compute_engine.pub - - # Setup gcloud CLI - - name: Authenticate to Google Cloud - id: auth - uses: google-github-actions/auth@v1.1.1 - with: - retries: '3' - workload_identity_provider: '${{ vars.GCP_WIF }}' - service_account: '${{ vars.GCP_DEPLOYMENTS_SA }}' - - - name: Set up Cloud SDK - uses: google-github-actions/setup-gcloud@v1.1.1 - - # Show all the logs since the container launched, - # following until we see zebrad startup messages. - # - # This check limits the number of log lines, so tests running on the wrong network don't - # run until the job timeout. If Zebra does a complete recompile, there are a few hundred log - # lines before the startup logs. So that's what we use here. - # - # The log pipeline ignores the exit status of `docker logs`. - # It also ignores the expected 'broken pipe' error from `tee`, - # which happens when `grep` finds a matching output and moves on to the next job. - # - # Errors in the tests are caught by the final test status job. - - name: Check startup logs for ${{ inputs.test_id }} - run: | - gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ - --zone ${{ vars.GCP_ZONE }} \ - --ssh-flag="-o ServerAliveInterval=5" \ - --ssh-flag="-o ConnectionAttempts=20" \ - --ssh-flag="-o ConnectTimeout=5" \ - --command \ - "\ - sudo docker logs \ - --tail all \ - --follow \ - ${{ inputs.test_id }} | \ - head -700 | \ - tee --output-error=exit /dev/stderr | \ - grep --max-count=1 --extended-regexp --color=always \ - -e 'Zcash network: ${{ inputs.network }}' \ - " - - # Check that the container executed at least 1 Rust test harness test, and that all tests passed. - # Then wait for the container to finish, and exit with the test's exit status. - # Also shows all the test logs. - # - # If the container has already finished, `docker wait` should return its status. - # But sometimes this doesn't work, so we use `docker inspect` as a fallback. - # - # `docker wait` prints the container exit status as a string, but we need to exit the `ssh` command - # with that status. - # (`docker wait` can also wait for multiple containers, but we only ever wait for a single container.) - - name: Result of ${{ inputs.test_id }} test - run: | - gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ - --zone ${{ vars.GCP_ZONE }} \ - --ssh-flag="-o ServerAliveInterval=5" \ - --ssh-flag="-o ConnectionAttempts=20" \ - --ssh-flag="-o ConnectTimeout=5" \ - --command=' \ - set -e; - set -o pipefail; - trap '' PIPE; - - sudo docker logs \ - --tail all \ - --follow \ - ${{ inputs.test_id }} | \ - tee --output-error=exit /dev/stderr | \ - grep --max-count=1 --extended-regexp --color=always \ - "test result: .*ok.* [1-9][0-9]* passed.*finished in"; \ - - EXIT_STATUS=$( \ - sudo docker wait ${{ inputs.test_id }} || \ - sudo docker inspect --format "{{.State.ExitCode}}" ${{ inputs.test_id }} || \ - echo "missing container, or missing exit status for container" \ - ); \ - - echo "sudo docker exit status: $EXIT_STATUS"; \ - exit "$EXIT_STATUS" \ - ' - - # create a state image from the instance's state disk, if requested by the caller - create-state-image: - name: Create ${{ inputs.test_id }} cached state image - runs-on: ubuntu-latest - needs: [ test-result, launch-with-cached-state ] - # We run exactly one of without-cached-state or with-cached-state, and we always skip the other one. - # Normally, if a job is skipped, all the jobs that depend on it are also skipped. - # So we need to override the default success() check to make this job run. - if: ${{ !cancelled() && !failure() && (inputs.saves_to_disk || inputs.force_save_to_disk) }} - permissions: - contents: 'read' - id-token: 'write' - steps: - - uses: actions/checkout@v4.0.0 - with: - persist-credentials: false - fetch-depth: '2' - - uses: r7kamura/rust-problem-matchers@v1.4.0 - - - name: Inject slug/short variables - uses: rlespinasse/github-slug-action@v4 - with: - short-length: 7 - - # Performs formatting on disk name components. - # - # Disk images in GCP are required to be in lowercase, but the blockchain network - # uses sentence case, so we need to downcase ${{ inputs.network }}. - # - # Disk image names in GCP are limited to 63 characters, so we need to limit - # branch names to 12 characters. - # - # Passes ${{ inputs.network }} to subsequent steps using $NETWORK env variable. - # Passes ${{ env.GITHUB_REF_SLUG_URL }} to subsequent steps using $SHORT_GITHUB_REF env variable. - - name: Format network name and branch name for disks - run: | - NETWORK_CAPS="${{ inputs.network }}" - echo "NETWORK=${NETWORK_CAPS,,}" >> "$GITHUB_ENV" - LONG_GITHUB_REF="${{ env.GITHUB_REF_SLUG_URL }}" - echo "SHORT_GITHUB_REF=${LONG_GITHUB_REF:0:12}" >> "$GITHUB_ENV" - - # Install our SSH secret - - name: Install private SSH key - uses: shimataro/ssh-key-action@v2.5.1 - with: - key: ${{ secrets.GCP_SSH_PRIVATE_KEY }} - name: google_compute_engine - known_hosts: unnecessary - - - name: Generate public SSH key - run: | - sudo apt-get update && sudo apt-get -qq install -y --no-install-recommends openssh-client - ssh-keygen -y -f ~/.ssh/google_compute_engine > ~/.ssh/google_compute_engine.pub - - # Setup gcloud CLI - - name: Authenticate to Google Cloud - id: auth - uses: google-github-actions/auth@v1.1.1 - with: - workload_identity_provider: '${{ vars.GCP_WIF }}' - service_account: '${{ vars.GCP_DEPLOYMENTS_SA }}' - - - name: Set up Cloud SDK - uses: google-github-actions/setup-gcloud@v1.1.1 - - # Get the state version from the local constants.rs file to be used in the image creation, - # as the state version is part of the disk image name. - # - # Passes the state version to subsequent steps using $STATE_VERSION env variable - - name: Get state version from constants.rs - run: | - LOCAL_STATE_VERSION=$(grep -oE "DATABASE_FORMAT_VERSION: .* [0-9]+" $GITHUB_WORKSPACE/zebra-state/src/constants.rs | grep -oE "[0-9]+" | tail -n1) - echo "STATE_VERSION: $LOCAL_STATE_VERSION" - - echo "STATE_VERSION=$LOCAL_STATE_VERSION" >> "$GITHUB_ENV" - - # Sets the $UPDATE_SUFFIX env var to "-u" if updating a previous cached state, - # and the empty string otherwise. - # - # Also sets a unique date and time suffix $TIME_SUFFIX. - - name: Set update and time suffixes - run: | - UPDATE_SUFFIX="" - - if [[ "${{ inputs.needs_zebra_state }}" == "true" ]] && [[ "${{ inputs.app_name }}" == "zebrad" ]]; then - UPDATE_SUFFIX="-u" - fi - - # TODO: find a better logic for the lwd-full-sync case - if [[ "${{ inputs.needs_lwd_state }}" == "true" ]] && [[ "${{ inputs.app_name }}" == "lightwalletd" ]] && [[ "${{ inputs.test_id }}" != 'lwd-full-sync' ]]; then - UPDATE_SUFFIX="-u" - fi - - # We're going to delete old images after a few days, so we only need the time here - TIME_SUFFIX=$(date '+%H%M%S' --utc) - - echo "UPDATE_SUFFIX=$UPDATE_SUFFIX" >> "$GITHUB_ENV" - echo "TIME_SUFFIX=$TIME_SUFFIX" >> "$GITHUB_ENV" - - # Get the full initial and running database versions from the test logs. - # These versions are used as part of the disk description and labels. - # - # If these versions are missing from the logs, the job fails. - # - # Typically, the database versions are around line 20 in the logs.. - # But we check the first 1000 log lines, just in case the test harness recompiles all the - # dependencies before running the test. (This can happen if the cache is invalid.) - # - # Passes the versions to subsequent steps using the $INITIAL_DISK_DB_VERSION, - # $RUNNING_DB_VERSION, and $DB_VERSION_SUMMARY env variables. - - name: Get database versions from logs - run: | - INITIAL_DISK_DB_VERSION="" - RUNNING_DB_VERSION="" - DB_VERSION_SUMMARY="" - - DOCKER_LOGS=$( \ - gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ - --zone ${{ vars.GCP_ZONE }} \ - --ssh-flag="-o ServerAliveInterval=5" \ - --ssh-flag="-o ConnectionAttempts=20" \ - --ssh-flag="-o ConnectTimeout=5" \ - --command=" \ - sudo docker logs ${{ inputs.test_id }} | head -1000 \ - ") - - # either a semantic version or "creating new database" - INITIAL_DISK_DB_VERSION=$( \ - echo "$DOCKER_LOGS" | \ - grep --extended-regexp --only-matching 'initial disk state version: [0-9a-z\.]+' | \ - grep --extended-regexp --only-matching '[0-9a-z\.]+' | \ - tail -1 || \ - [[ $? == 1 ]] \ - ) - - if [[ -z "$INITIAL_DISK_DB_VERSION" ]]; then - echo "Checked logs:" - echo "" - echo "$DOCKER_LOGS" - echo "" - echo "Missing initial disk database version in logs: $INITIAL_DISK_DB_VERSION" - # Fail the tests, because Zebra didn't log the initial disk database version, - # or the regex in this step is wrong. - false - fi - - if [[ "$INITIAL_DISK_DB_VERSION" = "creating.new.database" ]]; then - INITIAL_DISK_DB_VERSION="new" - else - INITIAL_DISK_DB_VERSION="v${INITIAL_DISK_DB_VERSION//./-}" - fi - - echo "Found initial disk database version in logs: $INITIAL_DISK_DB_VERSION" - echo "INITIAL_DISK_DB_VERSION=$INITIAL_DISK_DB_VERSION" >> "$GITHUB_ENV" - - RUNNING_DB_VERSION=$( \ - echo "$DOCKER_LOGS" | \ - grep --extended-regexp --only-matching 'running state version: [0-9\.]+' | \ - grep --extended-regexp --only-matching '[0-9\.]+' | \ - tail -1 || \ - [[ $? == 1 ]] \ - ) - - if [[ -z "$RUNNING_DB_VERSION" ]]; then - echo "Checked logs:" - echo "" - echo "$DOCKER_LOGS" - echo "" - echo "Missing running database version in logs: $RUNNING_DB_VERSION" - # Fail the tests, because Zebra didn't log the running database version, - # or the regex in this step is wrong. - false - fi - - RUNNING_DB_VERSION="v${RUNNING_DB_VERSION//./-}" - echo "Found running database version in logs: $RUNNING_DB_VERSION" - echo "RUNNING_DB_VERSION=$RUNNING_DB_VERSION" >> "$GITHUB_ENV" - - if [[ "$INITIAL_DISK_DB_VERSION" = "$RUNNING_DB_VERSION" ]]; then - DB_VERSION_SUMMARY="$RUNNING_DB_VERSION" - elif [[ "$INITIAL_DISK_DB_VERSION" = "new" ]]; then - DB_VERSION_SUMMARY="$RUNNING_DB_VERSION in new database" - else - DB_VERSION_SUMMARY="$INITIAL_DISK_DB_VERSION changing to $RUNNING_DB_VERSION" - fi - - echo "Summarised database versions from logs: $DB_VERSION_SUMMARY" - echo "DB_VERSION_SUMMARY=$DB_VERSION_SUMMARY" >> "$GITHUB_ENV" - - # Get the sync height from the test logs, which is later used as part of the - # disk description and labels. - # - # The regex used to grep the sync height is provided by ${{ inputs.height_grep_text }}, - # this allows to dynamically change the height as needed by different situations or - # based on the logs output from different tests. - # - # If the sync height is missing from the logs, the job fails. - # - # Passes the sync height to subsequent steps using the $SYNC_HEIGHT env variable. - - name: Get sync height from logs - run: | - SYNC_HEIGHT="" - - DOCKER_LOGS=$( \ - gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ - --zone ${{ vars.GCP_ZONE }} \ - --ssh-flag="-o ServerAliveInterval=5" \ - --ssh-flag="-o ConnectionAttempts=20" \ - --ssh-flag="-o ConnectTimeout=5" \ - --command=" \ - sudo docker logs ${{ inputs.test_id }} --tail 200 \ - ") - - SYNC_HEIGHT=$( \ - echo "$DOCKER_LOGS" | \ - grep --extended-regexp --only-matching '${{ inputs.height_grep_text }}[0-9]+' | \ - grep --extended-regexp --only-matching '[0-9]+' | \ - tail -1 || \ - [[ $? == 1 ]] \ - ) - - if [[ -z "$SYNC_HEIGHT" ]]; then - echo "Checked logs:" - echo "" - echo "$DOCKER_LOGS" - echo "" - echo "Missing sync height in logs: $SYNC_HEIGHT" - # Fail the tests, because Zebra and lightwalletd didn't log their sync heights, - # or the CI workflow sync height regex is wrong. - false - fi - - echo "Found sync height in logs: $SYNC_HEIGHT" - echo "SYNC_HEIGHT=$SYNC_HEIGHT" >> "$GITHUB_ENV" - - # Get the original cached state height from google cloud. - # - # If the height is missing from the image labels, uses zero instead. - # - # TODO: fail the job if needs_zebra_state but the height is missing - # we can make this change after all the old images have been deleted, this should happen around 15 September 2022 - # we'll also need to do a manual checkpoint rebuild before opening the PR for this change - # - # Passes the original height to subsequent steps using $ORIGINAL_HEIGHT env variable. - - name: Get original cached state height from google cloud - run: | - ORIGINAL_HEIGHT="0" - ORIGINAL_DISK_NAME="${{ format('{0}', needs.launch-with-cached-state.outputs.cached_disk_name) }}" - - if [[ -n "$ORIGINAL_DISK_NAME" ]]; then - ORIGINAL_HEIGHT=$(gcloud compute images list --filter="status=READY AND name=$ORIGINAL_DISK_NAME" --format="value(labels.height)") - ORIGINAL_HEIGHT=${ORIGINAL_HEIGHT:-0} - echo "$ORIGINAL_DISK_NAME height: $ORIGINAL_HEIGHT" - else - ORIGINAL_DISK_NAME="new-disk" - echo "newly created disk, original height set to 0" - fi - - echo "ORIGINAL_HEIGHT=$ORIGINAL_HEIGHT" >> "$GITHUB_ENV" - echo "ORIGINAL_DISK_NAME=$ORIGINAL_DISK_NAME" >> "$GITHUB_ENV" - - # Create an image from the state disk, which will be used for any tests that start - # after it is created. These tests can be in the same workflow, or in a different PR. - # - # Using the newest image makes future jobs faster, because it is closer to the chain tip. - # - # Skips creating updated images if the original image is less than $CACHED_STATE_UPDATE_LIMIT behind the current tip. - # Full sync images are always created. - # - # The image can contain: - # - Zebra cached state, or - # - Zebra + lightwalletd cached state. - # Which cached state is being saved to the disk is defined by ${{ inputs.disk_prefix }}. - # - # Google Cloud doesn't have an atomic image replacement operation. - # We don't want to delete and re-create the image, because that causes a ~5 minute - # window where might be no recent image. So we add an extra image with a unique name, - # which gets selected because it has a later creation time. - # This also simplifies the process of deleting old images, - # because we don't have to worry about accidentally deleting all the images. - # - # The timestamp makes images from the same commit unique, - # as long as they don't finish in the same second. - # (This is unlikely, because each image created by a workflow has a different name.) - # - # The image name must also be 63 characters or less. - # - # Force the image creation (--force) as the disk is still attached even though is not being - # used by the container. - - name: Create image from state disk - run: | - MINIMUM_UPDATE_HEIGHT=$((ORIGINAL_HEIGHT+CACHED_STATE_UPDATE_LIMIT)) - if [[ -z "$UPDATE_SUFFIX" ]] || [[ "$SYNC_HEIGHT" -gt "$MINIMUM_UPDATE_HEIGHT" ]] || [[ "${{ inputs.force_save_to_disk }}" == "true" ]]; then - gcloud compute images create \ - "${{ inputs.disk_prefix }}-${SHORT_GITHUB_REF}-${{ env.GITHUB_SHA_SHORT }}-v${{ env.STATE_VERSION }}-${NETWORK}-${{ inputs.disk_suffix }}${UPDATE_SUFFIX}-${TIME_SUFFIX}" \ - --force \ - --source-disk=${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }} \ - --source-disk-zone=${{ vars.GCP_ZONE }} \ - --storage-location=us \ - --description="Created from commit ${{ env.GITHUB_SHA_SHORT }} with height ${{ env.SYNC_HEIGHT }} and database format ${{ env.DB_VERSION_SUMMARY }}" \ - --labels="height=${{ env.SYNC_HEIGHT }},purpose=${{ inputs.disk_prefix }},commit=${{ env.GITHUB_SHA_SHORT }},state-version=${{ env.STATE_VERSION }},state-running-version=${RUNNING_DB_VERSION},initial-state-disk-version=${INITIAL_DISK_DB_VERSION},network=${NETWORK},target-height-kind=${{ inputs.disk_suffix }},update-flag=${UPDATE_SUFFIX},force-save=${{ inputs.force_save_to_disk }},updated-from-height=${ORIGINAL_HEIGHT},updated-from-disk=${ORIGINAL_DISK_NAME},test-id=${{ inputs.test_id }},app-name=${{ inputs.app_name }}" - else - echo "Skipped cached state update because the new sync height $SYNC_HEIGHT was less than $CACHED_STATE_UPDATE_LIMIT blocks above the original height $ORIGINAL_HEIGHT of $ORIGINAL_DISK_NAME" - fi - - # delete the Google Cloud instance for this test - delete-instance: - name: Delete ${{ inputs.test_id }} instance - runs-on: ubuntu-latest - needs: [ create-state-image ] - # If a disk generation step timeouts (+6 hours) the previous job (creating the image) will be skipped. - # Even if the instance continues running, no image will be created, so it's better to delete it. - if: always() - continue-on-error: true - permissions: - contents: 'read' - id-token: 'write' - steps: - - uses: actions/checkout@v4.0.0 - with: - persist-credentials: false - fetch-depth: '2' - - uses: r7kamura/rust-problem-matchers@v1.4.0 - - - name: Inject slug/short variables - uses: rlespinasse/github-slug-action@v4 - with: - short-length: 7 - - # Setup gcloud CLI - - name: Authenticate to Google Cloud - id: auth - uses: google-github-actions/auth@v1.1.1 - with: - workload_identity_provider: '${{ vars.GCP_WIF }}' - service_account: '${{ vars.GCP_DEPLOYMENTS_SA }}' - - - name: Set up Cloud SDK - uses: google-github-actions/setup-gcloud@v1.1.1 - - # Deletes the instances that has been recently deployed in the actual commit after all - # previous jobs have run, no matter the outcome of the job. - - name: Delete test instance - continue-on-error: true - run: | - INSTANCE=$(gcloud compute instances list --filter=${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} --format='value(NAME)') - if [ -z "${INSTANCE}" ]; then - echo "No instance to delete" - else - gcloud compute instances delete "${INSTANCE}" --zone "${{ vars.GCP_ZONE }}" --delete-disks all --quiet - fi