Skip to content

Commit

Permalink
scale deployment fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
karmab committed May 24, 2024
1 parent a29c625 commit 9db42c9
Show file tree
Hide file tree
Showing 6 changed files with 152 additions and 17 deletions.
5 changes: 5 additions & 0 deletions .github/scale_parameters.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
bmc_user: admin
bmc_password: password
hosts:
- name: ci-ai-node-3
bmc_url: http://192.168.122.1:9000/redfish/v1/Systems/local/ci-ai-node-3
14 changes: 7 additions & 7 deletions .github/workflows/nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,10 @@ jobs:
- name: Deploy ai cluster
run: aicli create deployment --pf $PARAMFILE -P pull_secret=$PULLSECRET $CLUSTER --force

clean-up:
needs: deploy-ai-cluster
runs-on: libvirt
steps:
- name: Clean everything after success
if: always()
run: kcli -C local delete plan --yes $CLUSTER
# clean-up:
# needs: deploy-ai-cluster
# runs-on: libvirt
# steps:
# - name: Clean everything after success
# if: always()
# run: kcli -C local delete plan --yes $CLUSTER
49 changes: 49 additions & 0 deletions .github/workflows/nightly_scale.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
name: nightly-ci-scale

on:
schedule:
- cron: '21 2 * * *'

env:
HOME: /root
PYTHONUNBUFFERED: true
CLUSTER: ci-ai
NODES: 3
PULLSECRET: /root/openshift_pull.json
PARAMFILE: .github/scale_parameters.yml
PLANFILE: .github/kcli_plan.yml
AI_OFFLINETOKEN: ${{ secrets.AI_OFFLINETOKEN }}

jobs:
requirements:
runs-on: libvirt
steps:
- uses: actions/checkout@v2
- run: git pull origin ${GITHUB_REF##*/}
- name: Install kcli
run: |
curl https://raw.githubusercontent.com/karmab/kcli/master/install.sh | bash
kcli create sushy-service
- name: Install aicli
run: |
pip3 install -U assisted-service-client
python3 setup.py install
- name: Clean up
continue-on-error: true
run: |
kcli delete iso --yes full.iso || true
scale-kcli-plan:
needs: requirements
runs-on: libvirt
steps:
- name: Deploy kcli plan
run: kcli -C local create plan -f $PLANFILE -P nodes=$(($NODES +2 )) $CLUSTER

scale-ai-cluster:
needs: scale-kcli-plan
runs-on: libvirt
timeout-minutes: 30
steps:
- name: Deploy ai cluster
run: aicli scale deployment --pf $PARAMFILE -P pull_secret=$PULLSECRET $CLUSTER
77 changes: 77 additions & 0 deletions .github/workflows/scale_cluster.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
name: scale-cluster

on:
workflow_dispatch:
inputs:
CLUSTER:
description: 'Cluster Name'
required: true
default: ci-ai
NODES:
description: 'Nodes'
required: true
default: 3
PULLSECRET:
description: 'Pull Secret'
required: false
default: /root/openshift_pull.json
PARAMFILE:
description: 'paramfile'
required: false
default: .github/scale_parameters.yml
PLANFILE:
description: 'planfile'
required: false
default: .github/kcli_plan.yml
EXTRAPARAMS:
description: 'Extra params'
default: ''

env:
HOME: /root
PYTHONUNBUFFERED: true
CLUSTER: ${{github.event.inputs.CLUSTER}}
NODES: ${{github.event.inputs.NODES}}
PULLSECRET: ${{github.event.inputs.PULLSECRET}}
PARAMFILE: ${{github.event.inputs.PARAMFILE}}
PLANFILE: ${{github.event.inputs.PLANFILE}}
EXTRAPARAMS: ${{github.event.inputs.EXTRAPARAMS}}
AI_OFFLINETOKEN: ${{ secrets.AI_OFFLINETOKEN }}
# CLUSTER: ai-ci
# PULLSECRET: /root/openshift_pull.json
# PARAMFILE: .github/aicli_parameters.yml

jobs:
requirements:
runs-on: libvirt
steps:
- uses: actions/checkout@v2
- run: git pull origin ${GITHUB_REF##*/}
- name: Install kcli
run: |
curl https://raw.githubusercontent.com/karmab/kcli/master/install.sh | bash
kcli create sushy-service
- name: Install aicli
run: |
pip3 install -U assisted-service-client
python3 setup.py install
- name: Clean up
continue-on-error: true
run: |
kcli delete iso --yes full.iso || true
scale-kcli-plan:
needs: requirements
runs-on: libvirt
steps:
- name: Scale kcli plan
run: kcli -C local create plan -f $PLANFILE -P nodes=$(($NODES +2)) $CLUSTER

scale-ai-cluster:
needs: scale-kcli-plan
runs-on: libvirt
timeout-minutes: 30
steps:
- name: Scale ai cluster
run: |
aicli scale deployment --pf $PARAMFILE -P pull_secret=$PULLSECRET $EXTRAPARAMS $CLUSTER
22 changes: 13 additions & 9 deletions ailib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1013,7 +1013,7 @@ def update_host(self, hostname, overrides):
elif not bind_updated and not extra_args_updated and not ignition_updated:
warning("Nothing updated for this host")

def wait_hosts(self, name, number=3, filter_installed=False, require_inventory=False):
def wait_hosts(self, name, number=3, filter_installed=False, require_inventory=False, filter_insufficient=False):
client = self.client
self.refresh_token(self.token, self.offlinetoken)
infra_env_id = self.get_infra_env_id(name)
Expand All @@ -1022,13 +1022,16 @@ def wait_hosts(self, name, number=3, filter_installed=False, require_inventory=F
if cluster_id is not None and client.v2_get_cluster(cluster_id=cluster_id).high_availability_mode == 'None':
number = 1
info(f"Waiting for hosts to reach expected number {number}", quiet=self.quiet)
installed = ['installed', 'added-to-existing-cluster']
while True:
try:
current_hosts = client.v2_list_hosts(infra_env_id=infra_env_id)
if require_inventory:
current_hosts = [h for h in current_hosts if 'inventory' in h]
if filter_installed:
current_hosts = [h for h in current_hosts if h['status'] != 'installed']
current_hosts = [h for h in current_hosts if h['status'] not in installed]
if filter_insufficient:
current_hosts = [h for h in current_hosts if h['status'] not in ['insufficient', 'discovering']]
if len(current_hosts) >= number:
return
else:
Expand Down Expand Up @@ -1582,11 +1585,11 @@ def create_fake_host(self, name, cluster, secret_key, overrides={}):
self.client.api_client.default_headers['X-Secret-Key'] = secret_key
self.client.v2_register_host(infra_env_id=infra_env_id, new_host_params=new_host_params)

def scale_deployment(self, cluster, overrides, force=False, debug=False):
def scale_deployment(self, cluster, overrides, debug=False):
infraenv = f"{cluster}_infra-env"
minimal = overrides.get('minimal', False)
if cluster.endswith('-day2'):
self.create_cluster(cluster, overrides.copy(), force=force)
infraenv = f"{cluster}_infra-env"
minimal = overrides.get('minimal', False)
self.create_cluster(cluster, overrides.copy())
overrides['cluster'] = cluster
self.create_infra_env(infraenv, overrides)
del overrides['cluster']
Expand Down Expand Up @@ -1616,13 +1619,14 @@ def scale_deployment(self, cluster, overrides, force=False, debug=False):
call(download_iso_cmd, shell=True)
hosts_number = len(overrides.get('hosts', [0, 0]))
info(f"Setting hosts_number to {hosts_number}")
if 'hosts' not in overrides:
if 'hosts' in overrides:
boot_overrides = overrides.copy()
boot_overrides['cluster'] = cluster
boot_result = boot_hosts(boot_overrides, debug=debug)
if boot_result != 0:
return {'result': 'failure', 'reason': 'Hit issue when booting hosts'}
self.wait_hosts(infraenv, hosts_number, filter_installed=True, require_inventory=True)
hosts = [h['requested_hostname'] for h in self.list_hosts() if h['status'] != 'installed']
self.wait_hosts(infraenv, hosts_number, filter_installed=True, require_inventory=True, filter_insufficient=True)
installed = ['installed', 'added-to-existing-cluster']
hosts = [h['requested_hostname'] for h in self.list_hosts() if h['status'] not in installed]
self.start_hosts(hosts)
return {'result': 'success'}
2 changes: 1 addition & 1 deletion ailib/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ def scale_deployment(args):
overrides = handle_parameters(args.param, args.paramfile)
ai = AssistedClient(args.url, token=args.token, offlinetoken=args.offlinetoken, debug=args.debug,
ca=args.ca, cert=args.cert, key=args.key)
ai.scale_deployment(args.cluster, overrides, force=args.force, debug=args.debugredfish)
ai.scale_deployment(args.cluster, overrides, debug=args.debugredfish)


def create_manifests(args):
Expand Down

0 comments on commit 9db42c9

Please sign in to comment.