-
-
Notifications
You must be signed in to change notification settings - Fork 747
218 lines (204 loc) · 8.4 KB
/
orquesta-integration-tests.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
# We run orquesta integration tests as part of a separate workflow.
# Orquesta tests have a lot of race conditions which result in intermediate failures and timeouts.
# Utilizing separate workflow allows us to re-run just this workflow / job on failure instead of
# wasting time and resources by needing to re-run all the jobs.
name: Orquesta CI
on:
push:
branches:
# only on merges to master branch
- master
# and version branches, which only include minor versions (eg: v3.4)
- v[0-9]+.[0-9]+
tags:
# also version tags, which include bugfix releases (eg: v3.4.0)
- v[0-9]+.[0-9]+.[0-9]+
pull_request:
types: [opened, reopened, synchronize]
branches:
# Only for PRs targeting those branches
- master
- v[0-9]+.[0-9]+
schedule:
# run every night at midnight
- cron: '0 0 * * *'
jobs:
# TODO: Fix the required checks!
# When the pre_job triggers and skips builds, it prevents merging the PR because
# the required checks are reported as skipped instead of passed.
# Special job which automatically cancels old runs for the same branch, prevents runs for the
# same file set which has already passed, etc.
pre_job:
name: Skip Duplicate Jobs Pre Job
runs-on: ubuntu-20.04
outputs:
should_skip: ${{ steps.skip_check.outputs.should_skip }}
steps:
- id: skip_check
uses: fkirc/skip-duplicate-actions@4c656bbdb6906310fa6213604828008bc28fe55d # v3.3.0
with:
cancel_others: 'true'
github_token: ${{ github.token }}
integration-tests:
needs: pre_job
# NOTE: We always want to run job on master since we run some additional checks there (code
# coverage, etc)
# if: ${{ needs.pre_job.outputs.should_skip != 'true' || github.ref == 'refs/heads/master' }}
name: '${{ matrix.make.name }} - Python ${{ matrix.python.version-short }}'
runs-on: ubuntu-20.04
strategy:
fail-fast: false
matrix:
# NOTE: We need to use full Python version as part of Python deps cache key otherwise
# setup virtualenv step will fail.
python:
- {version-short: '3.8', version: '3.8.10'}
- {version-short: '3.9', version: '3.9.14'}
- {version-short: '3.10', version: '3.10.15'}
- {version-short: '3.11', version: '3.11.10'}
make:
- name: 'Integration Tests (Orquesta)'
task: 'ci-orquesta'
shard: {k: 0, n: 1}
services:
mongo:
image: mongo:7.0
ports:
- 27017:27017
rabbitmq:
image: rabbitmq:3.8-management
options: >-
--name rabbitmq
ports:
- 5671:5671/tcp # AMQP SSL port
- 5672:5672/tcp # AMQP standard port
- 15672:15672/tcp # Management: HTTP, CLI
redis:
# Docker Hub image
image: redis
# Set health checks to wait until redis has started
options: >-
--name "redis"
--health-cmd "redis-cli ping"
--health-interval 10s
--health-timeout 5s
--health-retries 5
ports:
- 6379:6379/tcp
env:
TASK: '${{ matrix.make.task }}'
NODE_TOTAL: '${{ matrix.make.shard.n }}'
NODE_INDEX: '${{ matrix.make.shard.k }}'
# We need to explicitly specify terminal width otherwise some CLI tests fail on container
# environments where small terminal size is used.
COLUMNS: '120'
# CI st2.conf (with ST2_CI_USER user instead of stanley)
ST2_CONF: 'conf/st2.ci.conf'
# Tell StackStorm that we are indeed in CI mode, previously we hard coded a Travis specific
# environment variable in our test code, making it a PITA when we switch CI providers.
# Now, we simply set this environment varible here in the CI portion of our testing and
# it avoids any CI provider type lock-in.
ST2_CI: 'true'
# Name of the user who is running the CI (on GitHub Actions this is 'runner')
ST2_CI_USER: 'runner'
# GitHub is juggling how to set vars for multiple shells. Protect our PATH assumptions.
PATH: /home/runner/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Custom Environment Setup
run: |
./scripts/github/setup-environment.sh
- name: 'Set up Python (${{ matrix.python.version }}) and Cache Deps'
uses: ./.github/actions/setup-python
with:
python-version: '${{ matrix.python.version }}'
- name: Cache and Install APT Dependencies
uses: ./.github/actions/apt-packages
- name: Install virtualenv
run: |
./scripts/github/install-virtualenv.sh
- name: Install requirements
run: |
./scripts/ci/install-requirements.sh
- name: Setup Integration Tests
run: |
# prep a ci-specific dev conf file that uses runner instead of stanley
# this user is the username of the user in GitHub actions, used for SSH, etc during
# integration tests (important)
cp conf/st2.dev.conf "${ST2_CONF}" ; sed -i -e "s/stanley/${ST2_CI_USER}/" "${ST2_CONF}"
sudo -E ./scripts/ci/add-itest-user-key.sh
- name: Permissions Workaround
run: |
echo "$ST2_CI_REPO_PATH"
sudo ST2_CI_REPO_PATH="${ST2_CI_REPO_PATH}" scripts/ci/permissions-workaround.sh
- name: Print versions
run: |
./scripts/ci/print-versions.sh
- name: make
timeout-minutes: 41
env:
MAX_ATTEMPTS: 3
RETRY_DELAY: 5
# use: script -e -c to print colors
run: |
# There is a race in some orequesta integration tests so they tend to fail quite often.
# To avoid needed to re-run whole workflow in such case, we should try to retry this
# specific step. This saves us a bunch of time manually re-running the whole workflow.
# TODO: Try to identify problematic tests (iirc mostly orquesta ones) and only retry /
# re-run those.
set +e
for i in $(seq 1 ${MAX_ATTEMPTS}); do
echo "Attempt: ${i}/${MAX_ATTEMPTS}"
script -e -c "timeout 10m make ${TASK}" && exit 0
exit_code=$?
echo "Command failed / timed out (exit_code=${exit_code}), will retry in ${RETRY_DELAY} seconds..."
sleep ${RETRY_DELAY}
done
set -e
echo "Failed after ${MAX_ATTEMPTS} attempts, failing the job."
exit 1
- name: Compress Service Logs Before upload
if: ${{ failure() }}
run: |
./tools/launchdev.sh stop # stop st2 before collecting logs
tar cvzpf logs.tar.gz logs/*
- name: Upload StackStorm services Logs
if: ${{ failure() }}
uses: actions/upload-artifact@v4
with:
name: logs-py${{ matrix.python.version }}
path: logs.tar.gz
retention-days: 7
slack-notification:
name: Slack notification for failed master builds
if: always()
needs:
- integration-tests
runs-on: ubuntu-20.04
steps:
- name: Workflow conclusion
# this step creates an environment variable WORKFLOW_CONCLUSION and is the most reliable way to check the status of previous jobs
uses: technote-space/workflow-conclusion-action@v2
- name: CI Run Failure Slack Notification
if: ${{ env.WORKFLOW_CONCLUSION == 'failure' && github.ref == 'refs/heads/master' }}
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
uses: voxmedia/github-action-slack-notify-build@v1
with:
channel: development
status: FAILED
color: danger
# HELPER FOR FUTURE DEVELOPERS:
# If your GitHub Actions job is failing and you need to debug it, by default there is
# no way to SSH into the container.
# The step below can be uncommeted and will stop here and allow you to SSH in.
# When this step is reached, simply refresh the GitHub Actions output for this build
# and this SSH command will be printed every 5 seconds to the output.
# Once you are done debugging in your SSH session, simply: touch /continue
# and this will continue the build.
#
# - name: Setup tmate session for debugging failed jobs (allows SSH into the container)
# uses: mxschmitt/action-tmate@v3
# if: "${{ failure() }}"
#