From e50c9413621b5a890b8b051ab756c8e5bd30acbe Mon Sep 17 00:00:00 2001 From: Cristovao Cordeiro Date: Tue, 12 Dec 2023 16:05:25 +0100 Subject: [PATCH] feat: microk8s cluster for the WE Changing the Workflow Engine setup such that we now use a self-served K8s cluster in our OpenStack environment. --- src/workflow-engine/.terraform.lock.hcl | 100 --------------- src/workflow-engine/README.md | 145 ++++++++-------------- src/workflow-engine/cloud-init.yml | 31 ----- src/workflow-engine/cluster.tf | 155 ------------------------ src/workflow-engine/variables.tf | 4 - 5 files changed, 51 insertions(+), 384 deletions(-) delete mode 100644 src/workflow-engine/.terraform.lock.hcl delete mode 100644 src/workflow-engine/cloud-init.yml delete mode 100644 src/workflow-engine/cluster.tf delete mode 100644 src/workflow-engine/variables.tf diff --git a/src/workflow-engine/.terraform.lock.hcl b/src/workflow-engine/.terraform.lock.hcl deleted file mode 100644 index 984e6e7f..00000000 --- a/src/workflow-engine/.terraform.lock.hcl +++ /dev/null @@ -1,100 +0,0 @@ -# This file is maintained automatically by "terraform init". -# Manual edits may be lost in future updates. - -provider "registry.terraform.io/hashicorp/cloudinit" { - version = "2.3.2" - hashes = [ - "h1:Vl0aixAYTV/bjathX7VArC5TVNkxBCsi3Vq7R4z1uvc=", - "zh:2487e498736ed90f53de8f66fe2b8c05665b9f8ff1506f751c5ee227c7f457d1", - "zh:3d8627d142942336cf65eea6eb6403692f47e9072ff3fa11c3f774a3b93130b3", - "zh:434b643054aeafb5df28d5529b72acc20c6f5ded24decad73b98657af2b53f4f", - "zh:436aa6c2b07d82aa6a9dd746a3e3a627f72787c27c80552ceda6dc52d01f4b6f", - "zh:458274c5aabe65ef4dbd61d43ce759287788e35a2da004e796373f88edcaa422", - "zh:54bc70fa6fb7da33292ae4d9ceef5398d637c7373e729ed4fce59bd7b8d67372", - "zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3", - "zh:893ba267e18749c1a956b69be569f0d7bc043a49c3a0eb4d0d09a8e8b2ca3136", - "zh:95493b7517bce116f75cdd4c63b7c82a9d0d48ec2ef2f5eb836d262ef96d0aa7", - "zh:9ae21ab393be52e3e84e5cce0ef20e690d21f6c10ade7d9d9d22b39851bfeddc", - "zh:cc3b01ac2472e6d59358d54d5e4945032efbc8008739a6d4946ca1b621a16040", - "zh:f23bfe9758f06a1ec10ea3a81c9deedf3a7b42963568997d84a5153f35c5839a", - ] -} - -provider "registry.terraform.io/hashicorp/null" { - version = "3.2.1" - hashes = [ - "h1:FbGfc+muBsC17Ohy5g806iuI1hQc4SIexpYCrQHQd8w=", - "zh:58ed64389620cc7b82f01332e27723856422820cfd302e304b5f6c3436fb9840", - "zh:62a5cc82c3b2ddef7ef3a6f2fedb7b9b3deff4ab7b414938b08e51d6e8be87cb", - "zh:63cff4de03af983175a7e37e52d4bd89d990be256b16b5c7f919aff5ad485aa5", - "zh:74cb22c6700e48486b7cabefa10b33b801dfcab56f1a6ac9b6624531f3d36ea3", - "zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3", - "zh:79e553aff77f1cfa9012a2218b8238dd672ea5e1b2924775ac9ac24d2a75c238", - "zh:a1e06ddda0b5ac48f7e7c7d59e1ab5a4073bbcf876c73c0299e4610ed53859dc", - "zh:c37a97090f1a82222925d45d84483b2aa702ef7ab66532af6cbcfb567818b970", - "zh:e4453fbebf90c53ca3323a92e7ca0f9961427d2f0ce0d2b65523cc04d5d999c2", - "zh:e80a746921946d8b6761e77305b752ad188da60688cfd2059322875d363be5f5", - "zh:fbdb892d9822ed0e4cb60f2fedbdbb556e4da0d88d3b942ae963ed6ff091e48f", - "zh:fca01a623d90d0cad0843102f9b8b9fe0d3ff8244593bd817f126582b52dd694", - ] -} - -provider "registry.terraform.io/hashicorp/random" { - version = "3.5.1" - hashes = [ - "h1:VSnd9ZIPyfKHOObuQCaKfnjIHRtR7qTw19Rz8tJxm+k=", - "zh:04e3fbd610cb52c1017d282531364b9c53ef72b6bc533acb2a90671957324a64", - "zh:119197103301ebaf7efb91df8f0b6e0dd31e6ff943d231af35ee1831c599188d", - "zh:4d2b219d09abf3b1bb4df93d399ed156cadd61f44ad3baf5cf2954df2fba0831", - "zh:6130bdde527587bbe2dcaa7150363e96dbc5250ea20154176d82bc69df5d4ce3", - "zh:6cc326cd4000f724d3086ee05587e7710f032f94fc9af35e96a386a1c6f2214f", - "zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3", - "zh:b6d88e1d28cf2dfa24e9fdcc3efc77adcdc1c3c3b5c7ce503a423efbdd6de57b", - "zh:ba74c592622ecbcef9dc2a4d81ed321c4e44cddf7da799faa324da9bf52a22b2", - "zh:c7c5cde98fe4ef1143bd1b3ec5dc04baf0d4cc3ca2c5c7d40d17c0e9b2076865", - "zh:dac4bad52c940cd0dfc27893507c1e92393846b024c5a9db159a93c534a3da03", - "zh:de8febe2a2acd9ac454b844a4106ed295ae9520ef54dc8ed2faf29f12716b602", - "zh:eab0d0495e7e711cca367f7d4df6e322e6c562fc52151ec931176115b83ed014", - ] -} - -provider "registry.terraform.io/hashicorp/tls" { - version = "4.0.4" - hashes = [ - "h1:pe9vq86dZZKCm+8k1RhzARwENslF3SXb9ErHbQfgjXU=", - "zh:23671ed83e1fcf79745534841e10291bbf34046b27d6e68a5d0aab77206f4a55", - "zh:45292421211ffd9e8e3eb3655677700e3c5047f71d8f7650d2ce30242335f848", - "zh:59fedb519f4433c0fdb1d58b27c210b27415fddd0cd73c5312530b4309c088be", - "zh:5a8eec2409a9ff7cd0758a9d818c74bcba92a240e6c5e54b99df68fff312bbd5", - "zh:5e6a4b39f3171f53292ab88058a59e64825f2b842760a4869e64dc1dc093d1fe", - "zh:810547d0bf9311d21c81cc306126d3547e7bd3f194fc295836acf164b9f8424e", - "zh:824a5f3617624243bed0259d7dd37d76017097dc3193dac669be342b90b2ab48", - "zh:9361ccc7048be5dcbc2fafe2d8216939765b3160bd52734f7a9fd917a39ecbd8", - "zh:aa02ea625aaf672e649296bce7580f62d724268189fe9ad7c1b36bb0fa12fa60", - "zh:c71b4cd40d6ec7815dfeefd57d88bc592c0c42f5e5858dcc88245d371b4b8b1e", - "zh:dabcd52f36b43d250a3d71ad7abfa07b5622c69068d989e60b79b2bb4f220316", - "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", - ] -} - -provider "registry.terraform.io/terraform-provider-openstack/openstack" { - version = "1.51.1" - constraints = "~> 1.51.1" - hashes = [ - "h1:4P50wG0wseJaQyZbm0MZxw+s15iDGvXUlTh2k6zAFpI=", - "zh:41d77007ed8440efc2b79ae61a6b3f26642cfde2fae5f0b8e2f6cfb5f3e14a0c", - "zh:48b4365fc6ae3af3d4477f30430f71e1d1d5d336266ab615850a3c22b7a87f04", - "zh:739b5191700929906438212a573d8176a76598833eb84fa38069214f1878bce5", - "zh:784f663d2d862f53331888a674ca6f4619362a0c6a7e6925dc0ef1e8df811ba5", - "zh:7b18a754a96af2cf550f00da93c8c82b645137711ae6079930e488ae739b7e01", - "zh:954c3af03b20590419b5b94ddf01921f1ff87040409448d7210d262276088866", - "zh:96a830ee17b0e94e2ff643d44d206b87c43cb27faacb0c5f92d42462009b4f21", - "zh:a05e45c078531c32665f105160e9e4adbed7162888985577a69f1839a4630af4", - "zh:b1da80683c8476baa31300f0afc0e85e925445a0f245a0da20686bd3046faf46", - "zh:b4cc4a09c599cb46831c72fade63605561e0b8cdb7875a8e48ebe6407d0885b3", - "zh:e77bdf336a9595bf5494a0f2e4f307671c3981124577c0f21fc4d8023ea6a554", - "zh:eb5a994c2bd8c0228e2f90911f46db8066595593393a603d4411b044c61b01da", - "zh:f2d7e10c9bedcd13707b41c151de0a68f3ace960fa72f245454b7ac6e358dc56", - "zh:ffb9c5e10bcfeefac1c55c3d9fe85ec3622b0c81cecf832941ca23272b39acf0", - ] -} diff --git a/src/workflow-engine/README.md b/src/workflow-engine/README.md index 2f5fb26f..19358301 100644 --- a/src/workflow-engine/README.md +++ b/src/workflow-engine/README.md @@ -23,105 +23,62 @@ will be deployed. ### Prerequisites -- `terraform` (>=v1.5.5) -- this TF configuration is designed to use an internal OpenStack -infrastructure, so before proceeding, make sure you: - - connect to the VPN, - - forward all the necessary remote endpoints to your `localhost``, with: - - ```bash - ssh -L 5000:: -L 9696:: -L 8774:novaApiUrl>: @ - ``` - - this is needed because of Bastion, -- (optional) if you are managing an already existing deployment, **you need** -to request and copy the corresponding `terraform.tfstate` file into this -directory. NOTE: this file may contain sensitive information so please do not -commit or share it. - -### Deploy - -These Terraform configurations will take care of deploying the VMs and -configuring them with MicroK8s. The `cluster.tf` file has the OpenStack and -MicroK8s configurations, and it relies on `variables.tf` and `cloud-init.yml`. -The former has the secret connection variables that need to be set before -deploying the VMs, while the latter contains the Cloud-init recipes -for configuring said nodes. - - 1. set all the required Terraform variables: - - ```bash - # Find the values in LP - export TF_VAR_openstack_username= - export TF_VAR_openstack_password= - export TF_VAR_openstack_tenant_name= - export TF_VAR_openstack_region= - ``` +The team already has an OpenStack environment for this. So make sure you have +access to +[it](https://canonical-rocks-team-docs.readthedocs-hosted.com/en/latest/openstack_at_canonical.html#ps6-bos03). + +### Deploy a new MicroK8s cluster + +See the instructions +[here](https://rt.admin.canonical.com/Ticket/Display.html?id=161278#txn-3692567) +for deploying a brand new, self-managed, K8s cluster in an existing OpenStack +environment. + +NOTE: although the majority of this process is self-served, the infrastructure +can only be maintained by IS, which means we must **always open a request if +we need to alter the cluster (e.g. add/remove units)**. + +#### Check the MicroK8s deployment + +Once deployed, you should be able to reproduce the following steps from [within +the OpenStack environment where the K8s cluster was +deployed](https://canonical-rocks-team-docs.readthedocs-hosted.com/en/latest/openstack_at_canonical.html#ps6-bos03): + +- check that there are VMs deployed and `ACTIVE` with `openstack server list` +- confirm that the MicroK8s Juju deployment is healthy with `juju status +microk8s`. You should see something like: - 2. prepare the working directory with `terraform init` - 3. validate the configuration with `terraform validate` - 4. deploy with `terraform apply` - 5. save the resulting `terraform.tfstate` file to allow future management of - the deployed infrastructure. - -When performing any other Terraform command (e.g. `terraform destroy`), the -first 3 steps above are also required if not yet set. - -### Access - -If successful, you can validate the deployment by accessing the cluster. To do -that, you need the provisioned VMs' IPs: - -```bash -terraform show -json | \ - jq '.values.root_module.resources[] | - select(.type | contains("openstack_compute_instance_v2")) | - .values.network[].fixed_ip_v4' -``` - -If you just want the IP of the MicroK8s control plane, then: - -```bash -terraform show -json | \ - jq '.values.root_module.resources[] | - select(.name | contains("rocks-temporal-workers-controller")) | - .values.network[].fixed_ip_v4' -``` - -You can then SSH (via `ssh` or `openstack ssh`) into either one of the VMs, -with the `ubuntu` user. The VMs have 2 authorized SSH keys: - - 1. a default `rocks-team` key, which is already in place and can be used from -within the ROCKS environment in Scalingstack, and - 2. a key that is dynamically generated by Terraform at deployment time, and -that can be retrieved via - - ```bash - terraform show -json | \ - jq '.values.root_module.resources[] | - select(.type | contains("tls_private_key")) | - .values.private_key_pem' ``` + Model Controller Cloud/Region Version SLA Timestamp + model-name prodstack-controller cloud/region 3.1.6 unsupported 08:18:04Z + + App Version Status Scale Charm Channel Rev Exposed Message + microk8s 1.28.3 active 3 microk8s 1.28/stable 213 yes node is ready -Here's a list of useful operations that can be used to inspect the state of the -cluster, once inside the VMs + Unit Workload Agent Machine Public address Ports Message + microk8s/3* active idle 3 1.1.1.1 80/tcp,443/tcp,16443/tcp node is ready + microk8s/4 active idle 4 1.1.1.2 80/tcp,443/tcp,16443/tcp node is ready + microk8s/5 active idle 5 1.1.1.3 80/tcp,443/tcp,16443/tcp node is ready -- `microk8s status`: to ensure `microk8s` is up and running; -- `kubectl get no`: to ensure the `kubectl` alias is in place and that the -cluster has been properly formed; -- `cat /var/log/cloud-init*`: to inspect the Cloud-init script execution; -- `kubectl get po -A`: to double check that all pods are running, and if not -then `kubectl describe po -A` will dump their information and possibly the -reason why they are failing. + Machine State Address Inst id Series AZ Message + 3 started 1.1.1.1 59708835-f5aa-46b7-90a2-95221d8b13db jammy availability-zone-3 ACTIVE + 4 started 1.1.1.2 855ffa20-6e15-4218-9e85-c494aa5fbbe8 jammy availability-zone-1 ACTIVE + 5 started 1.1.1.3 ef04939a-a312-4405-a9a3-8afcec7fc8c0 jammy availability-zone-2 ACTIVE + ``` - +- the cluster nodes should also be accessible via SSH: `juju ssh microk8s/3` or +`ssh ubuntu@`. diff --git a/src/workflow-engine/cloud-init.yml b/src/workflow-engine/cloud-init.yml deleted file mode 100644 index ec34d0d7..00000000 --- a/src/workflow-engine/cloud-init.yml +++ /dev/null @@ -1,31 +0,0 @@ -#cloud-config - -# Based on the instructions from https://microk8s.io/docs/install-proxy -write_files: - - path: /etc/environment - content: | - HTTPS_PROXY=http://squid.internal:3128 - HTTP_PROXY=http://squid.internal:3128 - NO_PROXY=10.0.0.0/8,192.168.0.0/16,127.0.0.1,172.16.0.0/16 - https_proxy=http://squid.internal:3128 - http_proxy=http://squid.internal:3128 - no_proxy=10.0.0.0/8,192.168.0.0/16,127.0.0.1,172.16.0.0/16 - append: true - - path: /home/ubuntu/.ssh/authorized_keys - content: | - ${custom_ssh_pub_key} - append: true - -# Based on the instructions from -# https://juju.is/docs/juju/get-started-with-juju#heading--prepare-your-cloud -runcmd: - - set -ex - - snap install microk8s --channel 1.25-strict/stable - - usermod -a -G snap_microk8s ubuntu - - mkdir -p /home/.kube - - chown -f -R ubuntu /home/ubuntu - - microk8s start - - microk8s status - - microk8s enable hostpath-storage dns - - snap alias microk8s.kubectl kubectl - - ${clusterforming_cmd} diff --git a/src/workflow-engine/cluster.tf b/src/workflow-engine/cluster.tf deleted file mode 100644 index a5132a83..00000000 --- a/src/workflow-engine/cluster.tf +++ /dev/null @@ -1,155 +0,0 @@ -terraform { - required_version = ">= 1.5.5" - required_providers { - openstack = { - source = "terraform-provider-openstack/openstack" - version = "~> 1.51.1" - } - } -} - -# Configure the OpenStack Provider -provider "openstack" { - user_name = var.openstack_username - password = var.openstack_password - tenant_name = var.openstack_tenant_name - region = var.openstack_region - # Forward the OS remote ports such that the auth_url, network and compute - # urls are all reachable on localhost. - auth_url = "http://localhost:5000/v2.0/" - endpoint_overrides = { - "network" = "http://localhost:9696/v2.0/" - "compute" = "http://localhost:8774/v2/" - } -} - -resource "random_id" "cluster_token" { - byte_length = 16 -} - -resource "tls_private_key" "controller_ssh_key" { - algorithm = "RSA" - rsa_bits = 4096 -} - -################## CONTROL PLANE - -data "cloudinit_config" "control_config" { - gzip = true - base64_encode = true - part { - content_type = "text/cloud-config" - content = templatefile("${path.module}/cloud-init.yml", { - clusterforming_cmd : "microk8s add-node --token ${random_id.cluster_token.hex}", - custom_ssh_pub_key : "${tls_private_key.controller_ssh_key.public_key_openssh}" - }) - } - - depends_on = [ - random_id.cluster_token, - tls_private_key.controller_ssh_key - ] -} - -resource "openstack_compute_instance_v2" "rocks-temporal-workers-controller" { - # rocks-temporal-workers-controller is the microk8s control plane - name = "rocks-temporal-workers-controller" - # The images are refreshed regularly, so the IDs might change. - # Double check the desired Jammy image ID before deploying. - image_id = "bfa876f1-456a-440f-ae52-ea49bf6e35f6" # jammy - flavor_id = "4" - key_pair = "rocks-team" - security_groups = ["default"] - user_data = data.cloudinit_config.control_config.rendered - - network { - name = "net_prod-rocks-test" - } -} - -################## WORKERNODE - -data "cloudinit_config" "workernode_config" { - gzip = true - base64_encode = true - part { - content_type = "text/cloud-config" - content = templatefile("${path.module}/cloud-init.yml", { - # The `microk8s join` command will be done with a remote-exec because - # we first need to make this the workernode's hostname is added to the - # controller's /etc/hosts (which is also being done in post, below) - clusterforming_cmd : "", - custom_ssh_pub_key : "${tls_private_key.controller_ssh_key.public_key_openssh}" - }) - } - - depends_on = [ - random_id.cluster_token, - openstack_compute_instance_v2.rocks-temporal-workers-controller - ] -} - -resource "openstack_compute_instance_v2" "rocks-temporal-workers-workernode" { - # rocks-temporal-workers-controller is the microk8s control plane - name = "rocks-temporal-workers-workernode" - image_id = "bfa876f1-456a-440f-ae52-ea49bf6e35f6" # jammy - flavor_id = "3" - key_pair = "rocks-team" - security_groups = ["default"] - user_data = data.cloudinit_config.workernode_config.rendered - - network { - name = "net_prod-rocks-test" - } -} - -################## POST ACTIONS - -# Add the workernode's IP to the controller's /etc/hosts, otherwise the join -# command with fail with: -# > Connection failed. The hostname ... does not resolve to the IP...(400) -resource "null_resource" "controller_etc_hosts" { - depends_on = [ - openstack_compute_instance_v2.rocks-temporal-workers-controller, - openstack_compute_instance_v2.rocks-temporal-workers-workernode, - tls_private_key.controller_ssh_key - ] - - connection { - type = "ssh" - user = "ubuntu" - private_key = tls_private_key.controller_ssh_key.private_key_pem - host = openstack_compute_instance_v2.rocks-temporal-workers-controller.network.0.fixed_ip_v4 - } - - provisioner "remote-exec" { - inline = [ - "echo '${openstack_compute_instance_v2.rocks-temporal-workers-workernode.network.0.fixed_ip_v4} ${openstack_compute_instance_v2.rocks-temporal-workers-workernode.name}' | sudo tee -a /etc/hosts", - ] - } -} - -# At the very end, join the workernode to the controller, forming a cluster. -resource "null_resource" "microk8s_join" { - depends_on = [ - openstack_compute_instance_v2.rocks-temporal-workers-controller, - openstack_compute_instance_v2.rocks-temporal-workers-workernode, - tls_private_key.controller_ssh_key, - null_resource.controller_etc_hosts - ] - - connection { - type = "ssh" - user = "ubuntu" - private_key = tls_private_key.controller_ssh_key.private_key_pem - host = openstack_compute_instance_v2.rocks-temporal-workers-workernode.network.0.fixed_ip_v4 - } - - provisioner "remote-exec" { - inline = [ - # Wait for cloud-init to finish so we have microk8s up and running - "cloud-init status --wait", - "sudo microk8s join ${openstack_compute_instance_v2.rocks-temporal-workers-controller.network.0.fixed_ip_v4}:25000/${random_id.cluster_token.hex} --worker" - ] - } -} diff --git a/src/workflow-engine/variables.tf b/src/workflow-engine/variables.tf deleted file mode 100644 index 1b7bd3cb..00000000 --- a/src/workflow-engine/variables.tf +++ /dev/null @@ -1,4 +0,0 @@ -variable "openstack_username" {} -variable "openstack_password" {} -variable "openstack_tenant_name" {} -variable "openstack_region" {}