-
Notifications
You must be signed in to change notification settings - Fork 1
/
sno.yml
233 lines (203 loc) · 7.71 KB
/
sno.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
---
- name: Single-Node OpenShift (SNO) with NVIDIA vGPU on VMware
hosts: localhost
gather_facts: false
pre_tasks:
- name: Ensure local temp directory '{{ local_temp_dir }}' exists
ansible.builtin.file:
path: "{{ local_temp_dir }}"
state: directory
roles:
- name: vsphere
vars:
private_networks: "{{ use_private_networks | default(false) | bool }}"
temp_directory: "{{ local_temp_dir }}"
- hosts: "{{ 'bastion' if use_private_networks | default(false) | bool else 'localhost' }}"
gather_facts: false
vars:
private_networks: "{{ use_private_networks | default(false) | bool }}"
remote_temp_dir: "{{ '/root/openshift-cluster' if private_networks else local_temp_dir }}"
vgpu_profile: "grid_v100dx-32c" # Must be a CUDA-supported vGPU type
oc_client_binary: https://mirror.openshift.com/pub/openshift-v4/x86_64/clients/ocp/stable/openshift-client-linux.tar.gz
vcenter_ip: "{{ hostvars.localhost.vcenter_ip }}"
vcenter_username: "{{ hostvars.localhost.vcenter_username }}"
vcenter_password: "{{ hostvars.localhost.vcenter_password }}"
pre_tasks:
- name: Ensure remote temp directory '{{ remote_temp_dir }}' exists
ansible.builtin.file:
path: "{{ remote_temp_dir }}"
state: directory
when: private_networks
tasks:
# Some tasks must always run on the bastion
- ansible.builtin.include_role:
name: esxi_connections
apply:
delegate_to: bastion
- ansible.builtin.include_role:
name: vgpu_driver
apply:
delegate_to: bastion
- ansible.builtin.include_role:
name: host_graphics
apply:
delegate_to: bastion
- name: Install Python prerequisites
ansible.builtin.pip:
name:
- aicli>=99.0.202210301623.202103111306
- assisted-service-client>=2.9.0.post12
state: present
- name: Generate a cluster SSH key pair
ansible.builtin.openssh_keypair:
path: "{{ remote_temp_dir }}/{{openshift_cluster_name}}_ssh_rsa"
size: 2048
register: ssh_key_pair
- name: Create an assisted SNO cluster
karmab.aicli.ai_cluster:
name: "{{ openshift_cluster_name }}"
state: present
parameters:
openshift_version: "{{ openshift_version }}"
sno: true
minimal: true
pull_secret: "{{ lookup('file', pull_secret_path) | string }}"
base_dns_domain: "{{ openshift_base_domain }}"
ssh_public_key: "{{ ssh_key_pair.public_key }}"
tags: "{{ cluster_tags }}"
offlinetoken: "{{ ocm_offline_token }}"
- name: Read SNO cluster's InfraEnv
karmab.aicli.ai_infraenv_info:
name: "{{ openshift_cluster_name }}"
offlinetoken: "{{ ocm_offline_token }}"
register: sno_infraenv
- name: Copy script for downloading assisted installer ISO
ansible.builtin.copy:
src: download_iso.py
dest: bootstrap/download_iso.py
mode: u=xwr,g=r,o=r
delegate_to: bastion
# TODO: Avoid downloading if the file is busy - used by an existing VM
- name: Download assisted installer ISO for SNO cluster
ansible.builtin.command: "python3 $HOME/bootstrap/download_iso.py {{ sno_infraenv.download_url }} {{ openshift_cluster_name }}.iso"
args:
chdir: bootstrap
register: copy_result
failed_when: "copy_result.rc != 0 and 'Device or resource busy' not in copy_result.stderr" # ignore when the file is locked by an existing VM
delegate_to: bastion
- name: Create a VM for SNO
community.vmware.vmware_guest:
hostname: "{{ vcenter_ip }}"
username: "{{ vcenter_username }}"
password: "{{ vcenter_password }}"
folder: /
datacenter: Metal
name: "{{ vm_name }}"
state: present
guest_id: rhel8_64Guest
disk:
- size_gb: 180
type: thin
datastore: datastore1
networks:
- name: "{{ 'VM Private Net' if private_networks else 'VM Public Net 1' }}"
start_connected: yes
cdrom:
- controller_number: 0
unit_number: 0
state: present
type: iso
iso_path: "[datastore1] {{ openshift_cluster_name }}.iso"
hardware:
memory_mb: 32768
num_cpus: 8
scsi: paravirtual
boot_firmware: "efi"
advanced_settings:
- key: "pciPassthru.use64bitMMIO"
value: "TRUE"
- key: "pciPassthru.64bitMMIOSizeGB"
value: "512"
wait_for_ip_address: yes
validate_certs: no
register: vm
- name: Power off the VM before adding vGPU
community.vmware.vmware_guest_powerstate:
hostname: "{{ vcenter_ip }}"
username: "{{ vcenter_username }}"
password: "{{ vcenter_password }}"
folder: /
datacenter: Metal
name: "{{ vm_name }}"
state: powered-off
validate_certs: no
- name: Add a vGPU profile to the VM
community.vmware.vmware_guest_vgpu:
hostname: "{{ vcenter_ip }}"
username: "{{ vcenter_username }}"
password: "{{ vcenter_password }}"
folder: /
datacenter: Metal
name: "{{ vm_name }}"
state: present
vgpu: "{{ vgpu_profile }}"
validate_certs: no
- name: Print SNO host connection parameters
ansible.builtin.debug:
msg:
- "You can now SSH to the OpenShift node: ssh -i {{ ssh_key_pair.filename }} core@{{ vm.instance.hw_eth0.ipaddresses[0] }}"
- name: Wait for exactly one disconnected (down) host in the cluster
karmab.aicli.ai_cluster_info:
name: "{{ openshift_cluster_name }}"
offlinetoken: "{{ ocm_offline_token }}"
register: assisted_cluster
until: assisted_cluster.status == 'installed' or (assisted_cluster.hosts | length == 1 and assisted_cluster.hosts[0].status == 'disconnected')
retries: 10
delay: 60
- name: Power on the VM after adding vGPU
community.vmware.vmware_guest_powerstate:
hostname: "{{ vcenter_ip }}"
username: "{{ vcenter_username }}"
password: "{{ vcenter_password }}"
folder: /
datacenter: Metal
name: "{{ vm_name }}"
state: powered-on
validate_certs: no
- name: Set hostname (instead of localhost)
karmab.aicli.ai_host:
name: "{{ assisted_cluster.hosts[0].id }}"
state: present
parameters:
requested_hostname: "{{ openshift_node_hostname }}"
offlinetoken: "{{ ocm_offline_token }}"
when: assisted_cluster.status != 'installed'
- name: Install OpenShift cluster
ansible.builtin.include_role:
name: empovit.assisted_openshift.cluster_installation
vars:
cluster_name: "{{ openshift_cluster_name }}"
temp_directory: "{{ remote_temp_dir }}"
- name: Update DNS entries
ansible.builtin.include_role:
name: empovit.assisted_openshift.etc_hosts
vars:
api_vip: "{{ assisted_cluster.api_vip }}"
ingress_vip: "{{ assisted_cluster.ingress_vip }}"
cluster_name: "{{ openshift_cluster_name }}"
# conflicts with pip-installed PyYAML
- name: Uninstall PyYAML installed by distutils
ansible.builtin.package:
name: python3-yaml
use: apt
state: absent
when: private_networks
- name: Install oc binary on the bastion
ansible.builtin.shell: "curl -L -o - {{ oc_client_binary }} | tar -C /usr/local/bin -xvzf - oc"
when: private_networks
- name: Install GPU Operator
ansible.builtin.include_role:
name: empovit.gpu_operator
vars:
openshift_api_vip: "{{ assisted_cluster.api_vip }}"
gpu_type: vgpu