From 9db42c960e003537868430137fdd2c132d900257 Mon Sep 17 00:00:00 2001 From: karmab Date: Wed, 22 May 2024 16:35:50 +0200 Subject: [PATCH] scale deployment fixes --- .github/scale_parameters.yml | 5 ++ .github/workflows/nightly.yml | 14 +++--- .github/workflows/nightly_scale.yml | 49 ++++++++++++++++++ .github/workflows/scale_cluster.yml | 77 +++++++++++++++++++++++++++++ ailib/__init__.py | 22 +++++---- ailib/cli.py | 2 +- 6 files changed, 152 insertions(+), 17 deletions(-) create mode 100644 .github/scale_parameters.yml create mode 100644 .github/workflows/nightly_scale.yml create mode 100644 .github/workflows/scale_cluster.yml diff --git a/.github/scale_parameters.yml b/.github/scale_parameters.yml new file mode 100644 index 0000000..8fc0685 --- /dev/null +++ b/.github/scale_parameters.yml @@ -0,0 +1,5 @@ +bmc_user: admin +bmc_password: password +hosts: +- name: ci-ai-node-3 + bmc_url: http://192.168.122.1:9000/redfish/v1/Systems/local/ci-ai-node-3 diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index e6df97e..eed1226 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -49,10 +49,10 @@ jobs: - name: Deploy ai cluster run: aicli create deployment --pf $PARAMFILE -P pull_secret=$PULLSECRET $CLUSTER --force - clean-up: - needs: deploy-ai-cluster - runs-on: libvirt - steps: - - name: Clean everything after success - if: always() - run: kcli -C local delete plan --yes $CLUSTER +# clean-up: +# needs: deploy-ai-cluster +# runs-on: libvirt +# steps: +# - name: Clean everything after success +# if: always() +# run: kcli -C local delete plan --yes $CLUSTER diff --git a/.github/workflows/nightly_scale.yml b/.github/workflows/nightly_scale.yml new file mode 100644 index 0000000..69cf498 --- /dev/null +++ b/.github/workflows/nightly_scale.yml @@ -0,0 +1,49 @@ +name: nightly-ci-scale + +on: + schedule: + - cron: '21 2 * * *' + +env: + HOME: /root + PYTHONUNBUFFERED: true + CLUSTER: ci-ai + NODES: 3 + PULLSECRET: /root/openshift_pull.json + PARAMFILE: .github/scale_parameters.yml + PLANFILE: .github/kcli_plan.yml + AI_OFFLINETOKEN: ${{ secrets.AI_OFFLINETOKEN }} + +jobs: + requirements: + runs-on: libvirt + steps: + - uses: actions/checkout@v2 + - run: git pull origin ${GITHUB_REF##*/} + - name: Install kcli + run: | + curl https://raw.githubusercontent.com/karmab/kcli/master/install.sh | bash + kcli create sushy-service + - name: Install aicli + run: | + pip3 install -U assisted-service-client + python3 setup.py install + - name: Clean up + continue-on-error: true + run: | + kcli delete iso --yes full.iso || true + + scale-kcli-plan: + needs: requirements + runs-on: libvirt + steps: + - name: Deploy kcli plan + run: kcli -C local create plan -f $PLANFILE -P nodes=$(($NODES +2 )) $CLUSTER + + scale-ai-cluster: + needs: scale-kcli-plan + runs-on: libvirt + timeout-minutes: 30 + steps: + - name: Deploy ai cluster + run: aicli scale deployment --pf $PARAMFILE -P pull_secret=$PULLSECRET $CLUSTER diff --git a/.github/workflows/scale_cluster.yml b/.github/workflows/scale_cluster.yml new file mode 100644 index 0000000..ca1c153 --- /dev/null +++ b/.github/workflows/scale_cluster.yml @@ -0,0 +1,77 @@ +name: scale-cluster + +on: + workflow_dispatch: + inputs: + CLUSTER: + description: 'Cluster Name' + required: true + default: ci-ai + NODES: + description: 'Nodes' + required: true + default: 3 + PULLSECRET: + description: 'Pull Secret' + required: false + default: /root/openshift_pull.json + PARAMFILE: + description: 'paramfile' + required: false + default: .github/scale_parameters.yml + PLANFILE: + description: 'planfile' + required: false + default: .github/kcli_plan.yml + EXTRAPARAMS: + description: 'Extra params' + default: '' + +env: + HOME: /root + PYTHONUNBUFFERED: true + CLUSTER: ${{github.event.inputs.CLUSTER}} + NODES: ${{github.event.inputs.NODES}} + PULLSECRET: ${{github.event.inputs.PULLSECRET}} + PARAMFILE: ${{github.event.inputs.PARAMFILE}} + PLANFILE: ${{github.event.inputs.PLANFILE}} + EXTRAPARAMS: ${{github.event.inputs.EXTRAPARAMS}} + AI_OFFLINETOKEN: ${{ secrets.AI_OFFLINETOKEN }} +# CLUSTER: ai-ci +# PULLSECRET: /root/openshift_pull.json +# PARAMFILE: .github/aicli_parameters.yml + +jobs: + requirements: + runs-on: libvirt + steps: + - uses: actions/checkout@v2 + - run: git pull origin ${GITHUB_REF##*/} + - name: Install kcli + run: | + curl https://raw.githubusercontent.com/karmab/kcli/master/install.sh | bash + kcli create sushy-service + - name: Install aicli + run: | + pip3 install -U assisted-service-client + python3 setup.py install + - name: Clean up + continue-on-error: true + run: | + kcli delete iso --yes full.iso || true + + scale-kcli-plan: + needs: requirements + runs-on: libvirt + steps: + - name: Scale kcli plan + run: kcli -C local create plan -f $PLANFILE -P nodes=$(($NODES +2)) $CLUSTER + + scale-ai-cluster: + needs: scale-kcli-plan + runs-on: libvirt + timeout-minutes: 30 + steps: + - name: Scale ai cluster + run: | + aicli scale deployment --pf $PARAMFILE -P pull_secret=$PULLSECRET $EXTRAPARAMS $CLUSTER diff --git a/ailib/__init__.py b/ailib/__init__.py index ac7b417..088ee46 100644 --- a/ailib/__init__.py +++ b/ailib/__init__.py @@ -1013,7 +1013,7 @@ def update_host(self, hostname, overrides): elif not bind_updated and not extra_args_updated and not ignition_updated: warning("Nothing updated for this host") - def wait_hosts(self, name, number=3, filter_installed=False, require_inventory=False): + def wait_hosts(self, name, number=3, filter_installed=False, require_inventory=False, filter_insufficient=False): client = self.client self.refresh_token(self.token, self.offlinetoken) infra_env_id = self.get_infra_env_id(name) @@ -1022,13 +1022,16 @@ def wait_hosts(self, name, number=3, filter_installed=False, require_inventory=F if cluster_id is not None and client.v2_get_cluster(cluster_id=cluster_id).high_availability_mode == 'None': number = 1 info(f"Waiting for hosts to reach expected number {number}", quiet=self.quiet) + installed = ['installed', 'added-to-existing-cluster'] while True: try: current_hosts = client.v2_list_hosts(infra_env_id=infra_env_id) if require_inventory: current_hosts = [h for h in current_hosts if 'inventory' in h] if filter_installed: - current_hosts = [h for h in current_hosts if h['status'] != 'installed'] + current_hosts = [h for h in current_hosts if h['status'] not in installed] + if filter_insufficient: + current_hosts = [h for h in current_hosts if h['status'] not in ['insufficient', 'discovering']] if len(current_hosts) >= number: return else: @@ -1582,11 +1585,11 @@ def create_fake_host(self, name, cluster, secret_key, overrides={}): self.client.api_client.default_headers['X-Secret-Key'] = secret_key self.client.v2_register_host(infra_env_id=infra_env_id, new_host_params=new_host_params) - def scale_deployment(self, cluster, overrides, force=False, debug=False): + def scale_deployment(self, cluster, overrides, debug=False): + infraenv = f"{cluster}_infra-env" + minimal = overrides.get('minimal', False) if cluster.endswith('-day2'): - self.create_cluster(cluster, overrides.copy(), force=force) - infraenv = f"{cluster}_infra-env" - minimal = overrides.get('minimal', False) + self.create_cluster(cluster, overrides.copy()) overrides['cluster'] = cluster self.create_infra_env(infraenv, overrides) del overrides['cluster'] @@ -1616,13 +1619,14 @@ def scale_deployment(self, cluster, overrides, force=False, debug=False): call(download_iso_cmd, shell=True) hosts_number = len(overrides.get('hosts', [0, 0])) info(f"Setting hosts_number to {hosts_number}") - if 'hosts' not in overrides: + if 'hosts' in overrides: boot_overrides = overrides.copy() boot_overrides['cluster'] = cluster boot_result = boot_hosts(boot_overrides, debug=debug) if boot_result != 0: return {'result': 'failure', 'reason': 'Hit issue when booting hosts'} - self.wait_hosts(infraenv, hosts_number, filter_installed=True, require_inventory=True) - hosts = [h['requested_hostname'] for h in self.list_hosts() if h['status'] != 'installed'] + self.wait_hosts(infraenv, hosts_number, filter_installed=True, require_inventory=True, filter_insufficient=True) + installed = ['installed', 'added-to-existing-cluster'] + hosts = [h['requested_hostname'] for h in self.list_hosts() if h['status'] not in installed] self.start_hosts(hosts) return {'result': 'success'} diff --git a/ailib/cli.py b/ailib/cli.py index b7f85b4..0cdecaa 100644 --- a/ailib/cli.py +++ b/ailib/cli.py @@ -226,7 +226,7 @@ def scale_deployment(args): overrides = handle_parameters(args.param, args.paramfile) ai = AssistedClient(args.url, token=args.token, offlinetoken=args.offlinetoken, debug=args.debug, ca=args.ca, cert=args.cert, key=args.key) - ai.scale_deployment(args.cluster, overrides, force=args.force, debug=args.debugredfish) + ai.scale_deployment(args.cluster, overrides, debug=args.debugredfish) def create_manifests(args):