-
Notifications
You must be signed in to change notification settings - Fork 594
149 lines (146 loc) · 4.82 KB
/
integration-aws-nvidia-oss-cron.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT.
#
# Generated on 2024-05-23T10:20:45Z by kres 2688b70.
name: integration-aws-nvidia-oss-cron
concurrency:
group: ${{ github.head_ref || github.run_id }}
cancel-in-progress: true
"on":
schedule:
- cron: 30 7 * * *
jobs:
default:
runs-on:
- self-hosted
- generic
steps:
- name: gather-system-info
id: system-info
uses: kenchan0130/[email protected]
continue-on-error: true
- name: print-system-info
run: |
MEMORY_GB=$((${{ steps.system-info.outputs.totalmem }}/1024/1024/1024))
OUTPUTS=(
"CPU Core: ${{ steps.system-info.outputs.cpu-core }}"
"CPU Model: ${{ steps.system-info.outputs.cpu-model }}"
"Hostname: ${{ steps.system-info.outputs.hostname }}"
"NodeName: ${NODE_NAME}"
"Kernel release: ${{ steps.system-info.outputs.kernel-release }}"
"Kernel version: ${{ steps.system-info.outputs.kernel-version }}"
"Name: ${{ steps.system-info.outputs.name }}"
"Platform: ${{ steps.system-info.outputs.platform }}"
"Release: ${{ steps.system-info.outputs.release }}"
"Total memory: ${MEMORY_GB} GB"
)
for OUTPUT in "${OUTPUTS[@]}";do
echo "${OUTPUT}"
done
continue-on-error: true
- name: checkout
uses: actions/checkout@v4
- name: Unshallow
run: |
git fetch --prune --unshallow
- name: Set up Docker Buildx
id: setup-buildx
uses: docker/setup-buildx-action@v3
with:
driver: remote
endpoint: tcp://buildkit-amd64.ci.svc.cluster.local:1234
timeout-minutes: 10
- name: Mask secrets
run: |
echo -e "$(sops -d .secrets.yaml | yq -e '.secrets | to_entries[] | "::add-mask::" + .value')"
- name: Set secrets for job
run: |
sops -d .secrets.yaml | yq -e '.secrets | to_entries[] | .key + "=" + .value' >> "$GITHUB_ENV"
- name: Download artifacts
if: github.event_name != 'schedule'
uses: actions/download-artifact@v4
with:
name: artifacts
path: _out
- name: Fix artifact permissions
if: github.event_name != 'schedule'
run: |
xargs -a _out/executable-artifacts -I {} chmod +x {}
- name: build
if: github.event_name == 'schedule'
env:
IMAGE_REGISTRY: registry.dev.siderolabs.io
PLATFORM: linux/amd64,linux/arm64
PUSH: "true"
run: |
make talosctl-linux-amd64 kernel sd-boot sd-stub initramfs installer imager talos _out/integration-test-linux-amd64
- name: talosctl-cni-bundle
if: github.event_name == 'schedule'
run: |
make talosctl-cni-bundle
- name: iso
if: github.event_name == 'schedule'
env:
IMAGE_REGISTRY: registry.dev.siderolabs.io
run: |
make iso secureboot-iso
- name: images-essential
if: github.event_name == 'schedule'
env:
IMAGE_REGISTRY: registry.dev.siderolabs.io
PLATFORM: linux/amd64,linux/arm64
run: |
make images-essential
- name: checkout extensions
uses: actions/checkout@v4
with:
path: _out/extensions
ref: main
repository: siderolabs/extensions
- name: set variables
run: |
cat _out/talos-metadata >> "$GITHUB_ENV"
- name: build extensions
env:
PLATFORM: linux/amd64
PUSH: "true"
REGISTRY: registry.dev.siderolabs.io
run: |
make nvidia-container-toolkit nvidia-open-gpu-kernel-modules extensions-metadata -C _out/extensions
- name: e2e-aws-prepare
env:
E2E_AWS_TARGET: nvidia-oss
EXTENSIONS_METADATA_FILE: _out/extensions/_out/extensions-metadata
IMAGE_REGISTRY: registry.dev.siderolabs.io
run: |
make e2e-aws-prepare
- name: checkout contrib
uses: actions/checkout@v4
with:
path: _out/contrib
ref: main
repository: siderolabs/contrib
- name: setup tf
uses: hashicorp/setup-terraform@v3
with:
terraform_wrapper: "false"
- name: tf apply
env:
TF_E2E_ACTION: apply
TF_E2E_TEST_TYPE: aws
TF_SCRIPT_DIR: _out/contrib
run: |
make e2e-cloud-tf
- name: e2e-aws-nvidia-oss
env:
EXTRA_TEST_ARGS: -talos.extensions.nvidia
TEST_NUM_NODES: "4"
run: |
make e2e-aws
- name: tf destroy
if: always()
env:
TF_E2E_ACTION: destroy
TF_E2E_TEST_TYPE: aws
TF_SCRIPT_DIR: _out/contrib
run: |
make e2e-cloud-tf