From cd5193229326fefc7fdd5a92e6f656fab4f3fbd6 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Fri, 6 Dec 2024 13:46:42 +0300 Subject: [PATCH 001/161] 0.24.3 --- release | 2 +- releases | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/release b/release index 8b95abd94..63082344e 100644 --- a/release +++ b/release @@ -1 +1 @@ -0.24.2 +0.24.3 diff --git a/releases b/releases index b0e8711f3..f7ab5df6b 100644 --- a/releases +++ b/releases @@ -1,3 +1,4 @@ +0.24.2 0.24.1 0.24.0 0.23.7 From 6b1328291e154d785953b2118cd9abcdee220262 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Fri, 6 Dec 2024 13:50:01 +0300 Subject: [PATCH 002/161] env: manifests --- .../clickhouse-operator-install-ansible.yaml | 54 +++++++++---------- ...house-operator-install-bundle-v1beta1.yaml | 54 +++++++++---------- .../clickhouse-operator-install-bundle.yaml | 54 +++++++++---------- ...use-operator-install-template-v1beta1.yaml | 46 ++++++++-------- .../clickhouse-operator-install-template.yaml | 46 ++++++++-------- .../clickhouse-operator-install-tf.yaml | 54 +++++++++---------- deploy/operator/parts/crd.yaml | 14 ++--- 7 files changed, 161 insertions(+), 161 deletions(-) diff --git a/deploy/operator/clickhouse-operator-install-ansible.yaml b/deploy/operator/clickhouse-operator-install-ansible.yaml index 3d9bbd395..682b93235 100644 --- a/deploy/operator/clickhouse-operator-install-ansible.yaml +++ b/deploy/operator/clickhouse-operator-install-ansible.yaml @@ -11,14 +11,14 @@ # SINGULAR=clickhouseinstallation # PLURAL=clickhouseinstallations # SHORT=chi -# OPERATOR_VERSION=0.24.2 +# OPERATOR_VERSION=0.24.3 # apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: name: clickhouseinstallations.clickhouse.altinity.com labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 spec: group: clickhouse.altinity.com scope: Namespaced @@ -1291,14 +1291,14 @@ spec: # SINGULAR=clickhouseinstallationtemplate # PLURAL=clickhouseinstallationtemplates # SHORT=chit -# OPERATOR_VERSION=0.24.2 +# OPERATOR_VERSION=0.24.3 # apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: name: clickhouseinstallationtemplates.clickhouse.altinity.com labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 spec: group: clickhouse.altinity.com scope: Namespaced @@ -2574,7 +2574,7 @@ kind: CustomResourceDefinition metadata: name: clickhouseoperatorconfigurations.clickhouse.altinity.com labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 spec: group: clickhouse.altinity.com scope: Namespaced @@ -3023,14 +3023,14 @@ spec: --- # Template Parameters: # -# OPERATOR_VERSION=0.24.2 +# OPERATOR_VERSION=0.24.3 # apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: name: clickhousekeeperinstallations.clickhouse-keeper.altinity.com labels: - clickhouse-keeper.altinity.com/chop: 0.24.2 + clickhouse-keeper.altinity.com/chop: 0.24.3 spec: group: clickhouse-keeper.altinity.com scope: Namespaced @@ -3874,7 +3874,7 @@ metadata: name: clickhouse-operator namespace: {{ namespace }} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 --- # Template Parameters: # @@ -3900,7 +3900,7 @@ metadata: name: clickhouse-operator namespace: {{ namespace }} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 rules: # @@ -4119,7 +4119,7 @@ metadata: name: clickhouse-operator namespace: {{ namespace }} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 roleRef: apiGroup: rbac.authorization.k8s.io kind: Role @@ -4141,7 +4141,7 @@ metadata: name: etc-clickhouse-operator-files namespace: {{ namespace }} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: config.yaml: | @@ -4564,7 +4564,7 @@ metadata: name: etc-clickhouse-operator-confd-files namespace: {{ namespace }} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: --- @@ -4580,7 +4580,7 @@ metadata: name: etc-clickhouse-operator-configd-files namespace: {{ namespace }} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: 01-clickhouse-01-listen.xml: | @@ -4679,7 +4679,7 @@ metadata: name: etc-clickhouse-operator-templatesd-files namespace: {{ namespace }} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: 001-templates.json.example: | @@ -4779,7 +4779,7 @@ metadata: name: etc-clickhouse-operator-usersd-files namespace: {{ namespace }} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: 01-clickhouse-operator-profile.xml: | @@ -4842,7 +4842,7 @@ metadata: name: etc-keeper-operator-confd-files namespace: {{ namespace }} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: --- @@ -4858,7 +4858,7 @@ metadata: name: etc-keeper-operator-configd-files namespace: {{ namespace }} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: 01-keeper-01-default-config.xml: | @@ -4936,7 +4936,7 @@ metadata: name: etc-keeper-operator-templatesd-files namespace: {{ namespace }} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: readme: | @@ -4954,7 +4954,7 @@ metadata: name: etc-keeper-operator-usersd-files namespace: {{ namespace }} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: --- @@ -4962,7 +4962,7 @@ data: # Template parameters available: # NAMESPACE={{ namespace }} # COMMENT= -# OPERATOR_VERSION=0.24.2 +# OPERATOR_VERSION=0.24.3 # CH_USERNAME_SECRET_PLAIN=clickhouse_operator # CH_PASSWORD_SECRET_PLAIN={{ password }} # @@ -4972,7 +4972,7 @@ metadata: name: clickhouse-operator namespace: {{ namespace }} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator type: Opaque stringData: @@ -4983,9 +4983,9 @@ stringData: # # NAMESPACE={{ namespace }} # COMMENT= -# OPERATOR_IMAGE=altinity/clickhouse-operator:0.24.2 +# OPERATOR_IMAGE=altinity/clickhouse-operator:0.24.3 # OPERATOR_IMAGE_PULL_POLICY=Always -# METRICS_EXPORTER_IMAGE=altinity/metrics-exporter:0.24.2 +# METRICS_EXPORTER_IMAGE=altinity/metrics-exporter:0.24.3 # METRICS_EXPORTER_IMAGE_PULL_POLICY=Always # # Setup Deployment for clickhouse-operator @@ -4996,7 +4996,7 @@ metadata: name: clickhouse-operator namespace: {{ namespace }} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator spec: replicas: 1 @@ -5044,7 +5044,7 @@ spec: name: etc-keeper-operator-usersd-files containers: - name: clickhouse-operator - image: altinity/clickhouse-operator:0.24.2 + image: altinity/clickhouse-operator:0.24.3 imagePullPolicy: Always volumeMounts: - name: etc-clickhouse-operator-folder @@ -5120,7 +5120,7 @@ spec: name: metrics - name: metrics-exporter - image: altinity/metrics-exporter:0.24.2 + image: altinity/metrics-exporter:0.24.3 imagePullPolicy: Always volumeMounts: - name: etc-clickhouse-operator-folder @@ -5211,7 +5211,7 @@ metadata: name: clickhouse-operator-metrics namespace: {{ namespace }} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator spec: ports: diff --git a/deploy/operator/clickhouse-operator-install-bundle-v1beta1.yaml b/deploy/operator/clickhouse-operator-install-bundle-v1beta1.yaml index 12cf7fc2a..57af7a0c2 100644 --- a/deploy/operator/clickhouse-operator-install-bundle-v1beta1.yaml +++ b/deploy/operator/clickhouse-operator-install-bundle-v1beta1.yaml @@ -4,14 +4,14 @@ # SINGULAR=clickhouseinstallation # PLURAL=clickhouseinstallations # SHORT=chi -# OPERATOR_VERSION=0.24.2 +# OPERATOR_VERSION=0.24.3 # apiVersion: apiextensions.k8s.io/v1beta1 kind: CustomResourceDefinition metadata: name: clickhouseinstallations.clickhouse.altinity.com labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 spec: group: clickhouse.altinity.com scope: Namespaced @@ -1275,14 +1275,14 @@ spec: # SINGULAR=clickhouseinstallationtemplate # PLURAL=clickhouseinstallationtemplates # SHORT=chit -# OPERATOR_VERSION=0.24.2 +# OPERATOR_VERSION=0.24.3 # apiVersion: apiextensions.k8s.io/v1beta1 kind: CustomResourceDefinition metadata: name: clickhouseinstallationtemplates.clickhouse.altinity.com labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 spec: group: clickhouse.altinity.com scope: Namespaced @@ -2547,7 +2547,7 @@ kind: CustomResourceDefinition metadata: name: clickhouseoperatorconfigurations.clickhouse.altinity.com labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 spec: group: clickhouse.altinity.com scope: Namespaced @@ -2986,14 +2986,14 @@ spec: --- # Template Parameters: # -# OPERATOR_VERSION=0.24.2 +# OPERATOR_VERSION=0.24.3 # apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: name: clickhousekeeperinstallations.clickhouse-keeper.altinity.com labels: - clickhouse-keeper.altinity.com/chop: 0.24.2 + clickhouse-keeper.altinity.com/chop: 0.24.3 spec: group: clickhouse-keeper.altinity.com scope: Namespaced @@ -3834,7 +3834,7 @@ metadata: name: clickhouse-operator namespace: kube-system labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 # Template Parameters: # @@ -3859,7 +3859,7 @@ metadata: name: clickhouse-operator-kube-system #namespace: kube-system labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 rules: # # Core API group @@ -4068,7 +4068,7 @@ metadata: name: clickhouse-operator-kube-system #namespace: kube-system labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole @@ -4090,7 +4090,7 @@ metadata: name: etc-clickhouse-operator-files namespace: kube-system labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: config.yaml: | @@ -4512,7 +4512,7 @@ metadata: name: etc-clickhouse-operator-confd-files namespace: kube-system labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: --- @@ -4528,7 +4528,7 @@ metadata: name: etc-clickhouse-operator-configd-files namespace: kube-system labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: 01-clickhouse-01-listen.xml: | @@ -4622,7 +4622,7 @@ metadata: name: etc-clickhouse-operator-templatesd-files namespace: kube-system labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: 001-templates.json.example: | @@ -4720,7 +4720,7 @@ metadata: name: etc-clickhouse-operator-usersd-files namespace: kube-system labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: 01-clickhouse-operator-profile.xml: | @@ -4782,7 +4782,7 @@ metadata: name: etc-keeper-operator-confd-files namespace: kube-system labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: --- @@ -4798,7 +4798,7 @@ metadata: name: etc-keeper-operator-configd-files namespace: kube-system labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: 01-keeper-01-default-config.xml: | @@ -4873,7 +4873,7 @@ metadata: name: etc-keeper-operator-templatesd-files namespace: kube-system labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: readme: | @@ -4891,7 +4891,7 @@ metadata: name: etc-keeper-operator-usersd-files namespace: kube-system labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: --- @@ -4899,7 +4899,7 @@ data: # Template parameters available: # NAMESPACE=kube-system # COMMENT= -# OPERATOR_VERSION=0.24.2 +# OPERATOR_VERSION=0.24.3 # CH_USERNAME_SECRET_PLAIN=clickhouse_operator # CH_PASSWORD_SECRET_PLAIN=clickhouse_operator_password # @@ -4909,7 +4909,7 @@ metadata: name: clickhouse-operator namespace: kube-system labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator type: Opaque stringData: @@ -4920,9 +4920,9 @@ stringData: # # NAMESPACE=kube-system # COMMENT= -# OPERATOR_IMAGE=altinity/clickhouse-operator:0.24.2 +# OPERATOR_IMAGE=altinity/clickhouse-operator:0.24.3 # OPERATOR_IMAGE_PULL_POLICY=Always -# METRICS_EXPORTER_IMAGE=altinity/metrics-exporter:0.24.2 +# METRICS_EXPORTER_IMAGE=altinity/metrics-exporter:0.24.3 # METRICS_EXPORTER_IMAGE_PULL_POLICY=Always # # Setup Deployment for clickhouse-operator @@ -4933,7 +4933,7 @@ metadata: name: clickhouse-operator namespace: kube-system labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator spec: replicas: 1 @@ -4981,7 +4981,7 @@ spec: name: etc-keeper-operator-usersd-files containers: - name: clickhouse-operator - image: altinity/clickhouse-operator:0.24.2 + image: altinity/clickhouse-operator:0.24.3 imagePullPolicy: Always volumeMounts: - name: etc-clickhouse-operator-folder @@ -5055,7 +5055,7 @@ spec: - containerPort: 9999 name: metrics - name: metrics-exporter - image: altinity/metrics-exporter:0.24.2 + image: altinity/metrics-exporter:0.24.3 imagePullPolicy: Always volumeMounts: - name: etc-clickhouse-operator-folder @@ -5145,7 +5145,7 @@ metadata: name: clickhouse-operator-metrics namespace: kube-system labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator spec: ports: diff --git a/deploy/operator/clickhouse-operator-install-bundle.yaml b/deploy/operator/clickhouse-operator-install-bundle.yaml index cb8eb5278..80c563c4b 100644 --- a/deploy/operator/clickhouse-operator-install-bundle.yaml +++ b/deploy/operator/clickhouse-operator-install-bundle.yaml @@ -4,14 +4,14 @@ # SINGULAR=clickhouseinstallation # PLURAL=clickhouseinstallations # SHORT=chi -# OPERATOR_VERSION=0.24.2 +# OPERATOR_VERSION=0.24.3 # apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: name: clickhouseinstallations.clickhouse.altinity.com labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 spec: group: clickhouse.altinity.com scope: Namespaced @@ -1284,14 +1284,14 @@ spec: # SINGULAR=clickhouseinstallationtemplate # PLURAL=clickhouseinstallationtemplates # SHORT=chit -# OPERATOR_VERSION=0.24.2 +# OPERATOR_VERSION=0.24.3 # apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: name: clickhouseinstallationtemplates.clickhouse.altinity.com labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 spec: group: clickhouse.altinity.com scope: Namespaced @@ -2567,7 +2567,7 @@ kind: CustomResourceDefinition metadata: name: clickhouseoperatorconfigurations.clickhouse.altinity.com labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 spec: group: clickhouse.altinity.com scope: Namespaced @@ -3016,14 +3016,14 @@ spec: --- # Template Parameters: # -# OPERATOR_VERSION=0.24.2 +# OPERATOR_VERSION=0.24.3 # apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: name: clickhousekeeperinstallations.clickhouse-keeper.altinity.com labels: - clickhouse-keeper.altinity.com/chop: 0.24.2 + clickhouse-keeper.altinity.com/chop: 0.24.3 spec: group: clickhouse-keeper.altinity.com scope: Namespaced @@ -3867,7 +3867,7 @@ metadata: name: clickhouse-operator namespace: kube-system labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 --- # Template Parameters: # @@ -3893,7 +3893,7 @@ metadata: name: clickhouse-operator-kube-system #namespace: kube-system labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 rules: # @@ -4112,7 +4112,7 @@ metadata: name: clickhouse-operator-kube-system #namespace: kube-system labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole @@ -4134,7 +4134,7 @@ metadata: name: etc-clickhouse-operator-files namespace: kube-system labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: config.yaml: | @@ -4557,7 +4557,7 @@ metadata: name: etc-clickhouse-operator-confd-files namespace: kube-system labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: --- @@ -4573,7 +4573,7 @@ metadata: name: etc-clickhouse-operator-configd-files namespace: kube-system labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: 01-clickhouse-01-listen.xml: | @@ -4672,7 +4672,7 @@ metadata: name: etc-clickhouse-operator-templatesd-files namespace: kube-system labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: 001-templates.json.example: | @@ -4772,7 +4772,7 @@ metadata: name: etc-clickhouse-operator-usersd-files namespace: kube-system labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: 01-clickhouse-operator-profile.xml: | @@ -4835,7 +4835,7 @@ metadata: name: etc-keeper-operator-confd-files namespace: kube-system labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: --- @@ -4851,7 +4851,7 @@ metadata: name: etc-keeper-operator-configd-files namespace: kube-system labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: 01-keeper-01-default-config.xml: | @@ -4929,7 +4929,7 @@ metadata: name: etc-keeper-operator-templatesd-files namespace: kube-system labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: readme: | @@ -4947,7 +4947,7 @@ metadata: name: etc-keeper-operator-usersd-files namespace: kube-system labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: --- @@ -4955,7 +4955,7 @@ data: # Template parameters available: # NAMESPACE=kube-system # COMMENT= -# OPERATOR_VERSION=0.24.2 +# OPERATOR_VERSION=0.24.3 # CH_USERNAME_SECRET_PLAIN=clickhouse_operator # CH_PASSWORD_SECRET_PLAIN=clickhouse_operator_password # @@ -4965,7 +4965,7 @@ metadata: name: clickhouse-operator namespace: kube-system labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator type: Opaque stringData: @@ -4976,9 +4976,9 @@ stringData: # # NAMESPACE=kube-system # COMMENT= -# OPERATOR_IMAGE=altinity/clickhouse-operator:0.24.2 +# OPERATOR_IMAGE=altinity/clickhouse-operator:0.24.3 # OPERATOR_IMAGE_PULL_POLICY=Always -# METRICS_EXPORTER_IMAGE=altinity/metrics-exporter:0.24.2 +# METRICS_EXPORTER_IMAGE=altinity/metrics-exporter:0.24.3 # METRICS_EXPORTER_IMAGE_PULL_POLICY=Always # # Setup Deployment for clickhouse-operator @@ -4989,7 +4989,7 @@ metadata: name: clickhouse-operator namespace: kube-system labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator spec: replicas: 1 @@ -5037,7 +5037,7 @@ spec: name: etc-keeper-operator-usersd-files containers: - name: clickhouse-operator - image: altinity/clickhouse-operator:0.24.2 + image: altinity/clickhouse-operator:0.24.3 imagePullPolicy: Always volumeMounts: - name: etc-clickhouse-operator-folder @@ -5113,7 +5113,7 @@ spec: name: metrics - name: metrics-exporter - image: altinity/metrics-exporter:0.24.2 + image: altinity/metrics-exporter:0.24.3 imagePullPolicy: Always volumeMounts: - name: etc-clickhouse-operator-folder @@ -5204,7 +5204,7 @@ metadata: name: clickhouse-operator-metrics namespace: kube-system labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator spec: ports: diff --git a/deploy/operator/clickhouse-operator-install-template-v1beta1.yaml b/deploy/operator/clickhouse-operator-install-template-v1beta1.yaml index dacf28faf..bc7f69504 100644 --- a/deploy/operator/clickhouse-operator-install-template-v1beta1.yaml +++ b/deploy/operator/clickhouse-operator-install-template-v1beta1.yaml @@ -4,14 +4,14 @@ # SINGULAR=clickhouseinstallation # PLURAL=clickhouseinstallations # SHORT=chi -# OPERATOR_VERSION=0.24.2 +# OPERATOR_VERSION=0.24.3 # apiVersion: apiextensions.k8s.io/v1beta1 kind: CustomResourceDefinition metadata: name: clickhouseinstallations.clickhouse.altinity.com labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 spec: group: clickhouse.altinity.com scope: Namespaced @@ -1275,14 +1275,14 @@ spec: # SINGULAR=clickhouseinstallationtemplate # PLURAL=clickhouseinstallationtemplates # SHORT=chit -# OPERATOR_VERSION=0.24.2 +# OPERATOR_VERSION=0.24.3 # apiVersion: apiextensions.k8s.io/v1beta1 kind: CustomResourceDefinition metadata: name: clickhouseinstallationtemplates.clickhouse.altinity.com labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 spec: group: clickhouse.altinity.com scope: Namespaced @@ -2547,7 +2547,7 @@ kind: CustomResourceDefinition metadata: name: clickhouseoperatorconfigurations.clickhouse.altinity.com labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 spec: group: clickhouse.altinity.com scope: Namespaced @@ -2986,14 +2986,14 @@ spec: --- # Template Parameters: # -# OPERATOR_VERSION=0.24.2 +# OPERATOR_VERSION=0.24.3 # apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: name: clickhousekeeperinstallations.clickhouse-keeper.altinity.com labels: - clickhouse-keeper.altinity.com/chop: 0.24.2 + clickhouse-keeper.altinity.com/chop: 0.24.3 spec: group: clickhouse-keeper.altinity.com scope: Namespaced @@ -3834,7 +3834,7 @@ metadata: name: clickhouse-operator namespace: ${OPERATOR_NAMESPACE} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 # Template Parameters: # @@ -3859,7 +3859,7 @@ metadata: name: clickhouse-operator-${OPERATOR_NAMESPACE} #namespace: ${OPERATOR_NAMESPACE} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 rules: # # Core API group @@ -4068,7 +4068,7 @@ metadata: name: clickhouse-operator-${OPERATOR_NAMESPACE} #namespace: ${OPERATOR_NAMESPACE} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole @@ -4090,7 +4090,7 @@ metadata: name: etc-clickhouse-operator-files namespace: ${OPERATOR_NAMESPACE} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: config.yaml: | @@ -4512,7 +4512,7 @@ metadata: name: etc-clickhouse-operator-confd-files namespace: ${OPERATOR_NAMESPACE} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: --- @@ -4528,7 +4528,7 @@ metadata: name: etc-clickhouse-operator-configd-files namespace: ${OPERATOR_NAMESPACE} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: 01-clickhouse-01-listen.xml: | @@ -4622,7 +4622,7 @@ metadata: name: etc-clickhouse-operator-templatesd-files namespace: ${OPERATOR_NAMESPACE} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: 001-templates.json.example: | @@ -4720,7 +4720,7 @@ metadata: name: etc-clickhouse-operator-usersd-files namespace: ${OPERATOR_NAMESPACE} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: 01-clickhouse-operator-profile.xml: | @@ -4782,7 +4782,7 @@ metadata: name: etc-keeper-operator-confd-files namespace: ${OPERATOR_NAMESPACE} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: --- @@ -4798,7 +4798,7 @@ metadata: name: etc-keeper-operator-configd-files namespace: ${OPERATOR_NAMESPACE} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: 01-keeper-01-default-config.xml: | @@ -4873,7 +4873,7 @@ metadata: name: etc-keeper-operator-templatesd-files namespace: ${OPERATOR_NAMESPACE} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: readme: | @@ -4891,7 +4891,7 @@ metadata: name: etc-keeper-operator-usersd-files namespace: ${OPERATOR_NAMESPACE} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: --- @@ -4899,7 +4899,7 @@ data: # Template parameters available: # NAMESPACE=${OPERATOR_NAMESPACE} # COMMENT= -# OPERATOR_VERSION=0.24.2 +# OPERATOR_VERSION=0.24.3 # CH_USERNAME_SECRET_PLAIN=clickhouse_operator # CH_PASSWORD_SECRET_PLAIN=clickhouse_operator_password # @@ -4909,7 +4909,7 @@ metadata: name: clickhouse-operator namespace: ${OPERATOR_NAMESPACE} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator type: Opaque stringData: @@ -4933,7 +4933,7 @@ metadata: name: clickhouse-operator namespace: ${OPERATOR_NAMESPACE} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator spec: replicas: 1 @@ -5145,7 +5145,7 @@ metadata: name: clickhouse-operator-metrics namespace: ${OPERATOR_NAMESPACE} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator spec: ports: diff --git a/deploy/operator/clickhouse-operator-install-template.yaml b/deploy/operator/clickhouse-operator-install-template.yaml index e01f81dc4..fddc955ca 100644 --- a/deploy/operator/clickhouse-operator-install-template.yaml +++ b/deploy/operator/clickhouse-operator-install-template.yaml @@ -4,14 +4,14 @@ # SINGULAR=clickhouseinstallation # PLURAL=clickhouseinstallations # SHORT=chi -# OPERATOR_VERSION=0.24.2 +# OPERATOR_VERSION=0.24.3 # apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: name: clickhouseinstallations.clickhouse.altinity.com labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 spec: group: clickhouse.altinity.com scope: Namespaced @@ -1284,14 +1284,14 @@ spec: # SINGULAR=clickhouseinstallationtemplate # PLURAL=clickhouseinstallationtemplates # SHORT=chit -# OPERATOR_VERSION=0.24.2 +# OPERATOR_VERSION=0.24.3 # apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: name: clickhouseinstallationtemplates.clickhouse.altinity.com labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 spec: group: clickhouse.altinity.com scope: Namespaced @@ -2567,7 +2567,7 @@ kind: CustomResourceDefinition metadata: name: clickhouseoperatorconfigurations.clickhouse.altinity.com labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 spec: group: clickhouse.altinity.com scope: Namespaced @@ -3016,14 +3016,14 @@ spec: --- # Template Parameters: # -# OPERATOR_VERSION=0.24.2 +# OPERATOR_VERSION=0.24.3 # apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: name: clickhousekeeperinstallations.clickhouse-keeper.altinity.com labels: - clickhouse-keeper.altinity.com/chop: 0.24.2 + clickhouse-keeper.altinity.com/chop: 0.24.3 spec: group: clickhouse-keeper.altinity.com scope: Namespaced @@ -3867,7 +3867,7 @@ metadata: name: clickhouse-operator namespace: ${OPERATOR_NAMESPACE} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 --- # Template Parameters: # @@ -3893,7 +3893,7 @@ metadata: name: clickhouse-operator-${OPERATOR_NAMESPACE} #namespace: ${OPERATOR_NAMESPACE} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 rules: # @@ -4112,7 +4112,7 @@ metadata: name: clickhouse-operator-${OPERATOR_NAMESPACE} #namespace: ${OPERATOR_NAMESPACE} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole @@ -4134,7 +4134,7 @@ metadata: name: etc-clickhouse-operator-files namespace: ${OPERATOR_NAMESPACE} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: config.yaml: | @@ -4557,7 +4557,7 @@ metadata: name: etc-clickhouse-operator-confd-files namespace: ${OPERATOR_NAMESPACE} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: --- @@ -4573,7 +4573,7 @@ metadata: name: etc-clickhouse-operator-configd-files namespace: ${OPERATOR_NAMESPACE} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: 01-clickhouse-01-listen.xml: | @@ -4672,7 +4672,7 @@ metadata: name: etc-clickhouse-operator-templatesd-files namespace: ${OPERATOR_NAMESPACE} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: 001-templates.json.example: | @@ -4772,7 +4772,7 @@ metadata: name: etc-clickhouse-operator-usersd-files namespace: ${OPERATOR_NAMESPACE} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: 01-clickhouse-operator-profile.xml: | @@ -4835,7 +4835,7 @@ metadata: name: etc-keeper-operator-confd-files namespace: ${OPERATOR_NAMESPACE} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: --- @@ -4851,7 +4851,7 @@ metadata: name: etc-keeper-operator-configd-files namespace: ${OPERATOR_NAMESPACE} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: 01-keeper-01-default-config.xml: | @@ -4929,7 +4929,7 @@ metadata: name: etc-keeper-operator-templatesd-files namespace: ${OPERATOR_NAMESPACE} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: readme: | @@ -4947,7 +4947,7 @@ metadata: name: etc-keeper-operator-usersd-files namespace: ${OPERATOR_NAMESPACE} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: --- @@ -4955,7 +4955,7 @@ data: # Template parameters available: # NAMESPACE=${OPERATOR_NAMESPACE} # COMMENT= -# OPERATOR_VERSION=0.24.2 +# OPERATOR_VERSION=0.24.3 # CH_USERNAME_SECRET_PLAIN=clickhouse_operator # CH_PASSWORD_SECRET_PLAIN=clickhouse_operator_password # @@ -4965,7 +4965,7 @@ metadata: name: clickhouse-operator namespace: ${OPERATOR_NAMESPACE} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator type: Opaque stringData: @@ -4989,7 +4989,7 @@ metadata: name: clickhouse-operator namespace: ${OPERATOR_NAMESPACE} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator spec: replicas: 1 @@ -5204,7 +5204,7 @@ metadata: name: clickhouse-operator-metrics namespace: ${OPERATOR_NAMESPACE} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator spec: ports: diff --git a/deploy/operator/clickhouse-operator-install-tf.yaml b/deploy/operator/clickhouse-operator-install-tf.yaml index ea63ed120..a017e5021 100644 --- a/deploy/operator/clickhouse-operator-install-tf.yaml +++ b/deploy/operator/clickhouse-operator-install-tf.yaml @@ -11,14 +11,14 @@ # SINGULAR=clickhouseinstallation # PLURAL=clickhouseinstallations # SHORT=chi -# OPERATOR_VERSION=0.24.2 +# OPERATOR_VERSION=0.24.3 # apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: name: clickhouseinstallations.clickhouse.altinity.com labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 spec: group: clickhouse.altinity.com scope: Namespaced @@ -1291,14 +1291,14 @@ spec: # SINGULAR=clickhouseinstallationtemplate # PLURAL=clickhouseinstallationtemplates # SHORT=chit -# OPERATOR_VERSION=0.24.2 +# OPERATOR_VERSION=0.24.3 # apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: name: clickhouseinstallationtemplates.clickhouse.altinity.com labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 spec: group: clickhouse.altinity.com scope: Namespaced @@ -2574,7 +2574,7 @@ kind: CustomResourceDefinition metadata: name: clickhouseoperatorconfigurations.clickhouse.altinity.com labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 spec: group: clickhouse.altinity.com scope: Namespaced @@ -3023,14 +3023,14 @@ spec: --- # Template Parameters: # -# OPERATOR_VERSION=0.24.2 +# OPERATOR_VERSION=0.24.3 # apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: name: clickhousekeeperinstallations.clickhouse-keeper.altinity.com labels: - clickhouse-keeper.altinity.com/chop: 0.24.2 + clickhouse-keeper.altinity.com/chop: 0.24.3 spec: group: clickhouse-keeper.altinity.com scope: Namespaced @@ -3874,7 +3874,7 @@ metadata: name: clickhouse-operator namespace: ${namespace} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 --- # Template Parameters: # @@ -3900,7 +3900,7 @@ metadata: name: clickhouse-operator namespace: ${namespace} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 rules: # @@ -4119,7 +4119,7 @@ metadata: name: clickhouse-operator namespace: ${namespace} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 roleRef: apiGroup: rbac.authorization.k8s.io kind: Role @@ -4141,7 +4141,7 @@ metadata: name: etc-clickhouse-operator-files namespace: ${namespace} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: config.yaml: | @@ -4564,7 +4564,7 @@ metadata: name: etc-clickhouse-operator-confd-files namespace: ${namespace} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: --- @@ -4580,7 +4580,7 @@ metadata: name: etc-clickhouse-operator-configd-files namespace: ${namespace} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: 01-clickhouse-01-listen.xml: | @@ -4679,7 +4679,7 @@ metadata: name: etc-clickhouse-operator-templatesd-files namespace: ${namespace} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: 001-templates.json.example: | @@ -4779,7 +4779,7 @@ metadata: name: etc-clickhouse-operator-usersd-files namespace: ${namespace} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: 01-clickhouse-operator-profile.xml: | @@ -4842,7 +4842,7 @@ metadata: name: etc-keeper-operator-confd-files namespace: ${namespace} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: --- @@ -4858,7 +4858,7 @@ metadata: name: etc-keeper-operator-configd-files namespace: ${namespace} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: 01-keeper-01-default-config.xml: | @@ -4936,7 +4936,7 @@ metadata: name: etc-keeper-operator-templatesd-files namespace: ${namespace} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: readme: | @@ -4954,7 +4954,7 @@ metadata: name: etc-keeper-operator-usersd-files namespace: ${namespace} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator data: --- @@ -4962,7 +4962,7 @@ data: # Template parameters available: # NAMESPACE=${namespace} # COMMENT= -# OPERATOR_VERSION=0.24.2 +# OPERATOR_VERSION=0.24.3 # CH_USERNAME_SECRET_PLAIN=clickhouse_operator # CH_PASSWORD_SECRET_PLAIN=${password} # @@ -4972,7 +4972,7 @@ metadata: name: clickhouse-operator namespace: ${namespace} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator type: Opaque stringData: @@ -4983,9 +4983,9 @@ stringData: # # NAMESPACE=${namespace} # COMMENT= -# OPERATOR_IMAGE=altinity/clickhouse-operator:0.24.2 +# OPERATOR_IMAGE=altinity/clickhouse-operator:0.24.3 # OPERATOR_IMAGE_PULL_POLICY=Always -# METRICS_EXPORTER_IMAGE=altinity/metrics-exporter:0.24.2 +# METRICS_EXPORTER_IMAGE=altinity/metrics-exporter:0.24.3 # METRICS_EXPORTER_IMAGE_PULL_POLICY=Always # # Setup Deployment for clickhouse-operator @@ -4996,7 +4996,7 @@ metadata: name: clickhouse-operator namespace: ${namespace} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator spec: replicas: 1 @@ -5044,7 +5044,7 @@ spec: name: etc-keeper-operator-usersd-files containers: - name: clickhouse-operator - image: altinity/clickhouse-operator:0.24.2 + image: altinity/clickhouse-operator:0.24.3 imagePullPolicy: Always volumeMounts: - name: etc-clickhouse-operator-folder @@ -5120,7 +5120,7 @@ spec: name: metrics - name: metrics-exporter - image: altinity/metrics-exporter:0.24.2 + image: altinity/metrics-exporter:0.24.3 imagePullPolicy: Always volumeMounts: - name: etc-clickhouse-operator-folder @@ -5211,7 +5211,7 @@ metadata: name: clickhouse-operator-metrics namespace: ${namespace} labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 app: clickhouse-operator spec: ports: diff --git a/deploy/operator/parts/crd.yaml b/deploy/operator/parts/crd.yaml index bc671ec64..22481014c 100644 --- a/deploy/operator/parts/crd.yaml +++ b/deploy/operator/parts/crd.yaml @@ -4,14 +4,14 @@ # SINGULAR=clickhouseinstallation # PLURAL=clickhouseinstallations # SHORT=chi -# OPERATOR_VERSION=0.24.2 +# OPERATOR_VERSION=0.24.3 # apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: name: clickhouseinstallations.clickhouse.altinity.com labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 spec: group: clickhouse.altinity.com scope: Namespaced @@ -1909,14 +1909,14 @@ spec: # SINGULAR=clickhouseinstallationtemplate # PLURAL=clickhouseinstallationtemplates # SHORT=chit -# OPERATOR_VERSION=0.24.2 +# OPERATOR_VERSION=0.24.3 # apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: name: clickhouseinstallationtemplates.clickhouse.altinity.com labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 spec: group: clickhouse.altinity.com scope: Namespaced @@ -3817,7 +3817,7 @@ kind: CustomResourceDefinition metadata: name: clickhouseoperatorconfigurations.clickhouse.altinity.com labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 spec: group: clickhouse.altinity.com scope: Namespaced @@ -4435,14 +4435,14 @@ spec: --- # Template Parameters: # -# OPERATOR_VERSION=0.24.2 +# OPERATOR_VERSION=0.24.3 # apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: name: clickhousekeeperinstallations.clickhouse-keeper.altinity.com labels: - clickhouse-keeper.altinity.com/chop: 0.24.2 + clickhouse-keeper.altinity.com/chop: 0.24.3 spec: group: clickhouse-keeper.altinity.com scope: Namespaced From 88adf634290af5b354d96140e2cda8f7be1d316e Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Fri, 6 Dec 2024 13:50:15 +0300 Subject: [PATCH 003/161] env: helm chart --- deploy/helm/clickhouse-operator/Chart.yaml | 4 ++-- deploy/helm/clickhouse-operator/README.md | 2 +- ...ition-clickhouseinstallations.clickhouse.altinity.com.yaml | 4 ++-- ...ickhouseinstallationtemplates.clickhouse.altinity.com.yaml | 4 ++-- ...usekeeperinstallations.clickhouse-keeper.altinity.com.yaml | 4 ++-- ...ckhouseoperatorconfigurations.clickhouse.altinity.com.yaml | 2 +- .../templates/generated/Deployment-clickhouse-operator.yaml | 4 ++-- .../templates/generated/Secret-clickhouse-operator.yaml | 2 +- 8 files changed, 13 insertions(+), 13 deletions(-) diff --git a/deploy/helm/clickhouse-operator/Chart.yaml b/deploy/helm/clickhouse-operator/Chart.yaml index fef823dca..0373c0124 100644 --- a/deploy/helm/clickhouse-operator/Chart.yaml +++ b/deploy/helm/clickhouse-operator/Chart.yaml @@ -13,8 +13,8 @@ description: |- kubectl apply -f https://github.com/Altinity/clickhouse-operator/raw/master/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhousekeeperinstallations.clickhouse-keeper.altinity.com.yaml ``` type: application -version: 0.24.2 -appVersion: 0.24.2 +version: 0.24.3 +appVersion: 0.24.3 home: https://github.com/Altinity/clickhouse-operator icon: https://logosandtypes.com/wp-content/uploads/2020/12/altinity.svg maintainers: diff --git a/deploy/helm/clickhouse-operator/README.md b/deploy/helm/clickhouse-operator/README.md index bbf7df4c6..01fcb5bf0 100644 --- a/deploy/helm/clickhouse-operator/README.md +++ b/deploy/helm/clickhouse-operator/README.md @@ -1,6 +1,6 @@ # altinity-clickhouse-operator -![Version: 0.24.2](https://img.shields.io/badge/Version-0.24.2-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 0.24.2](https://img.shields.io/badge/AppVersion-0.24.2-informational?style=flat-square) +![Version: 0.24.3](https://img.shields.io/badge/Version-0.24.3-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 0.24.3](https://img.shields.io/badge/AppVersion-0.24.3-informational?style=flat-square) Helm chart to deploy [altinity-clickhouse-operator](https://github.com/Altinity/clickhouse-operator). diff --git a/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhouseinstallations.clickhouse.altinity.com.yaml b/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhouseinstallations.clickhouse.altinity.com.yaml index e2f8e253d..f14e5fb20 100644 --- a/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhouseinstallations.clickhouse.altinity.com.yaml +++ b/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhouseinstallations.clickhouse.altinity.com.yaml @@ -4,14 +4,14 @@ # SINGULAR=clickhouseinstallation # PLURAL=clickhouseinstallations # SHORT=chi -# OPERATOR_VERSION=0.24.2 +# OPERATOR_VERSION=0.24.3 # apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: name: clickhouseinstallations.clickhouse.altinity.com labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 spec: group: clickhouse.altinity.com scope: Namespaced diff --git a/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhouseinstallationtemplates.clickhouse.altinity.com.yaml b/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhouseinstallationtemplates.clickhouse.altinity.com.yaml index 49866e868..a41cee7bc 100644 --- a/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhouseinstallationtemplates.clickhouse.altinity.com.yaml +++ b/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhouseinstallationtemplates.clickhouse.altinity.com.yaml @@ -4,14 +4,14 @@ # SINGULAR=clickhouseinstallationtemplate # PLURAL=clickhouseinstallationtemplates # SHORT=chit -# OPERATOR_VERSION=0.24.2 +# OPERATOR_VERSION=0.24.3 # apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: name: clickhouseinstallationtemplates.clickhouse.altinity.com labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 spec: group: clickhouse.altinity.com scope: Namespaced diff --git a/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhousekeeperinstallations.clickhouse-keeper.altinity.com.yaml b/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhousekeeperinstallations.clickhouse-keeper.altinity.com.yaml index 9bee334ca..8a6db55cc 100644 --- a/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhousekeeperinstallations.clickhouse-keeper.altinity.com.yaml +++ b/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhousekeeperinstallations.clickhouse-keeper.altinity.com.yaml @@ -1,13 +1,13 @@ # Template Parameters: # -# OPERATOR_VERSION=0.24.2 +# OPERATOR_VERSION=0.24.3 # apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: name: clickhousekeeperinstallations.clickhouse-keeper.altinity.com labels: - clickhouse-keeper.altinity.com/chop: 0.24.2 + clickhouse-keeper.altinity.com/chop: 0.24.3 spec: group: clickhouse-keeper.altinity.com scope: Namespaced diff --git a/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhouseoperatorconfigurations.clickhouse.altinity.com.yaml b/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhouseoperatorconfigurations.clickhouse.altinity.com.yaml index ea6f01dd5..46e5e8410 100644 --- a/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhouseoperatorconfigurations.clickhouse.altinity.com.yaml +++ b/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhouseoperatorconfigurations.clickhouse.altinity.com.yaml @@ -7,7 +7,7 @@ kind: CustomResourceDefinition metadata: name: clickhouseoperatorconfigurations.clickhouse.altinity.com labels: - clickhouse.altinity.com/chop: 0.24.2 + clickhouse.altinity.com/chop: 0.24.3 spec: group: clickhouse.altinity.com scope: Namespaced diff --git a/deploy/helm/clickhouse-operator/templates/generated/Deployment-clickhouse-operator.yaml b/deploy/helm/clickhouse-operator/templates/generated/Deployment-clickhouse-operator.yaml index 84661fd40..a87488dc8 100644 --- a/deploy/helm/clickhouse-operator/templates/generated/Deployment-clickhouse-operator.yaml +++ b/deploy/helm/clickhouse-operator/templates/generated/Deployment-clickhouse-operator.yaml @@ -2,9 +2,9 @@ # # NAMESPACE=kube-system # COMMENT= -# OPERATOR_IMAGE=altinity/clickhouse-operator:0.24.2 +# OPERATOR_IMAGE=altinity/clickhouse-operator:0.24.3 # OPERATOR_IMAGE_PULL_POLICY=Always -# METRICS_EXPORTER_IMAGE=altinity/metrics-exporter:0.24.2 +# METRICS_EXPORTER_IMAGE=altinity/metrics-exporter:0.24.3 # METRICS_EXPORTER_IMAGE_PULL_POLICY=Always # # Setup Deployment for clickhouse-operator diff --git a/deploy/helm/clickhouse-operator/templates/generated/Secret-clickhouse-operator.yaml b/deploy/helm/clickhouse-operator/templates/generated/Secret-clickhouse-operator.yaml index 664cf93d9..ffb590aa2 100644 --- a/deploy/helm/clickhouse-operator/templates/generated/Secret-clickhouse-operator.yaml +++ b/deploy/helm/clickhouse-operator/templates/generated/Secret-clickhouse-operator.yaml @@ -3,7 +3,7 @@ # Template parameters available: # NAMESPACE=kube-system # COMMENT= -# OPERATOR_VERSION=0.24.2 +# OPERATOR_VERSION=0.24.3 # CH_USERNAME_SECRET_PLAIN=clickhouse_operator # CH_PASSWORD_SECRET_PLAIN=clickhouse_operator_password # From b7ffd36f7fee7dbe818a493d2535450a38fd1129 Mon Sep 17 00:00:00 2001 From: Janek Lasocki-Biczysko Date: Fri, 6 Dec 2024 11:02:41 +0000 Subject: [PATCH 004/161] fix: release_chart workflow bugs Was missing an `id` on a step that was referenced by a latter step, and also missing `}}` closing parens --- .github/workflows/release_chart.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/release_chart.yaml b/.github/workflows/release_chart.yaml index d2fe251aa..16333d685 100644 --- a/.github/workflows/release_chart.yaml +++ b/.github/workflows/release_chart.yaml @@ -27,6 +27,7 @@ jobs: run: cr package deploy/helm/clickhouse-operator - name: Get Release Assets + id: get_assets run: | CHART_PATH=$(ls .cr-release-packages/altinity-clickhouse-operator-*.tgz) ASSET_NAME=$(basename ${CHART_PATH}) @@ -41,7 +42,7 @@ jobs: if: steps.get_assets.outputs.asset_id != '' run: | curl -X DELETE -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \ - "https://api.github.com/repos/${{ github.repository + "https://api.github.com/repos/${{ github.repository }} - name: Upload Release Artifacts run: | From 3387d75be38b868cedebbad368ffddba9ab15c91 Mon Sep 17 00:00:00 2001 From: Eugene Klimov Date: Fri, 6 Dec 2024 19:43:48 +0400 Subject: [PATCH 005/161] replace deprecated set-output to $GITHUB_OUTPUT (#1591) * backport https://github.com/Altinity/clickhouse-operator/pull/1585 to 0.24.1 Signed-off-by: Slach * backport https://github.com/Altinity/clickhouse-operator/pull/1585 to 0.24.1 (#1587) Signed-off-by: Slach Co-authored-by: Vladislav Klimenko * allow trigger helm release after edit releases, not only publish * allow trigger helm release after edit releases, not only publish (#1588) * allow trigger helm release after edit releases, not only publish (#1589) * allow trigger helm release after edit releases, not only publish (#1590) * replace set-output to $GITHUB_OUTPUT, look https://github.blog/changelog/2022-10-11-github-actions-deprecating-save-state-and-set-output-commands/ and https://github.com/Altinity/clickhouse-operator/actions/runs/12201273120 --------- Signed-off-by: Slach Co-authored-by: Vladislav Klimenko --- .github/workflows/release_chart.yaml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/release_chart.yaml b/.github/workflows/release_chart.yaml index 16333d685..1f5267e3c 100644 --- a/.github/workflows/release_chart.yaml +++ b/.github/workflows/release_chart.yaml @@ -2,7 +2,9 @@ name: release_chart on: release: - types: [ published ] + types: + - published + - edited jobs: release_chart: @@ -36,7 +38,7 @@ jobs: jq -r ".[] | select(.name == \"$ASSET_NAME\") | .id") echo "Asset ID is $ASSET_ID" - echo "::set-output name=asset_id::$ASSET_ID" + echo "asset_id=$ASSET_ID" >> $GITHUB_OUTPUT - name: Delete Existing Release Artifacts if: steps.get_assets.outputs.asset_id != '' From e8d69022938d9eaf4bcdd5d3b9bdff4215c30266 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Tue, 10 Dec 2024 15:30:46 +0300 Subject: [PATCH 006/161] dev: namespace func in cr --- pkg/apis/clickhouse-keeper.altinity.com/v1/type_chk.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pkg/apis/clickhouse-keeper.altinity.com/v1/type_chk.go b/pkg/apis/clickhouse-keeper.altinity.com/v1/type_chk.go index 1a7dbfef1..8e65d03f7 100644 --- a/pkg/apis/clickhouse-keeper.altinity.com/v1/type_chk.go +++ b/pkg/apis/clickhouse-keeper.altinity.com/v1/type_chk.go @@ -659,3 +659,7 @@ func (cr *ClickHouseKeeperInstallation) IsZero() bool { func (cr *ClickHouseKeeperInstallation) IsNonZero() bool { return cr != nil } + +func (cr *ClickHouseKeeperInstallation) NamespaceName() (string, string) { + return util.NamespaceName(cr) +} From cef97948f1e443205dd9dfb49637e6f8cffcfec4 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Tue, 10 Dec 2024 15:31:09 +0300 Subject: [PATCH 007/161] dev: add namespace fn to the interface --- pkg/apis/clickhouse.altinity.com/v1/interface.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkg/apis/clickhouse.altinity.com/v1/interface.go b/pkg/apis/clickhouse.altinity.com/v1/interface.go index 09b6ad399..450efe45a 100644 --- a/pkg/apis/clickhouse.altinity.com/v1/interface.go +++ b/pkg/apis/clickhouse.altinity.com/v1/interface.go @@ -22,6 +22,8 @@ import ( type ICustomResource interface { meta.Object + NamespaceName() (string, string) + IsNonZero() bool IsZero() bool From 2926aa6c4c98dbef75a001e0ae0a01e691311d5d Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Tue, 10 Dec 2024 15:31:36 +0300 Subject: [PATCH 008/161] dev: chk status simplification --- .../v1/type_status.go | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pkg/apis/clickhouse-keeper.altinity.com/v1/type_status.go b/pkg/apis/clickhouse-keeper.altinity.com/v1/type_status.go index 16356e13d..82a4b479b 100644 --- a/pkg/apis/clickhouse-keeper.altinity.com/v1/type_status.go +++ b/pkg/apis/clickhouse-keeper.altinity.com/v1/type_status.go @@ -736,14 +736,14 @@ func (s *Status) GetEndpoint() string { // GetNormalizedCR gets target CR func (s *Status) GetNormalizedCR() *ClickHouseKeeperInstallation { - return getInstallationWithReadLock(s, func(s *Status) *ClickHouseKeeperInstallation { + return getCRWithReadLock(s, func(s *Status) *ClickHouseKeeperInstallation { return s.NormalizedCR }) } // GetNormalizedCRCompleted gets completed CR func (s *Status) GetNormalizedCRCompleted() *ClickHouseKeeperInstallation { - return getInstallationWithReadLock(s, func(s *Status) *ClickHouseKeeperInstallation { + return getCRWithReadLock(s, func(s *Status) *ClickHouseKeeperInstallation { return s.NormalizedCRCompleted }) } @@ -757,7 +757,7 @@ func (s *Status) GetHostsWithTablesCreated() []string { // Begin helpers -func doWithWriteLock(s *Status, f func(s *Status)) { +func doWithWriteLock(s *Status, f func(*Status)) { if s == nil { return } @@ -767,7 +767,7 @@ func doWithWriteLock(s *Status, f func(s *Status)) { f(s) } -func doWithReadLock(s *Status, f func(s *Status)) { +func doWithReadLock(s *Status, f func(*Status)) { if s == nil { return } @@ -777,7 +777,7 @@ func doWithReadLock(s *Status, f func(s *Status)) { f(s) } -func getIntWithReadLock(s *Status, f func(s *Status) int) int { +func getIntWithReadLock(s *Status, f func(*Status) int) int { var zeroVal int if s == nil { return zeroVal @@ -788,7 +788,7 @@ func getIntWithReadLock(s *Status, f func(s *Status) int) int { return f(s) } -func getStringWithReadLock(s *Status, f func(s *Status) string) string { +func getStringWithReadLock(s *Status, f func(*Status) string) string { var zeroVal string if s == nil { return zeroVal @@ -799,7 +799,7 @@ func getStringWithReadLock(s *Status, f func(s *Status) string) string { return f(s) } -func getInstallationWithReadLock(s *Status, f func(s *Status) *ClickHouseKeeperInstallation) *ClickHouseKeeperInstallation { +func getCRWithReadLock(s *Status, f func(*Status) *ClickHouseKeeperInstallation) *ClickHouseKeeperInstallation { var zeroVal *ClickHouseKeeperInstallation if s == nil { return zeroVal @@ -810,7 +810,7 @@ func getInstallationWithReadLock(s *Status, f func(s *Status) *ClickHouseKeeperI return f(s) } -func getStringArrWithReadLock(s *Status, f func(s *Status) []string) []string { +func getStringArrWithReadLock(s *Status, f func(*Status) []string) []string { emptyArr := make([]string, 0, 0) if s == nil { return emptyArr From 0454d4895591d0b9ca9d5d5b83baa46f48c4d70d Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Tue, 10 Dec 2024 15:31:59 +0300 Subject: [PATCH 009/161] dev: add namespace fn to chi --- pkg/apis/clickhouse.altinity.com/v1/type_chi.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pkg/apis/clickhouse.altinity.com/v1/type_chi.go b/pkg/apis/clickhouse.altinity.com/v1/type_chi.go index f152842aa..1ab74f4c2 100644 --- a/pkg/apis/clickhouse.altinity.com/v1/type_chi.go +++ b/pkg/apis/clickhouse.altinity.com/v1/type_chi.go @@ -687,3 +687,7 @@ func (cr *ClickHouseInstallation) IsZero() bool { func (cr *ClickHouseInstallation) IsNonZero() bool { return cr != nil } + +func (cr *ClickHouseInstallation) NamespaceName() (string, string) { + return util.NamespaceName(cr) +} From dee99ad9e3dd55eaf1ede58c1a0ba4637da56db9 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Tue, 10 Dec 2024 15:32:18 +0300 Subject: [PATCH 010/161] devL chi status simplififcation --- .../clickhouse.altinity.com/v1/type_status.go | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pkg/apis/clickhouse.altinity.com/v1/type_status.go b/pkg/apis/clickhouse.altinity.com/v1/type_status.go index 3f6709682..4ac023f90 100644 --- a/pkg/apis/clickhouse.altinity.com/v1/type_status.go +++ b/pkg/apis/clickhouse.altinity.com/v1/type_status.go @@ -735,14 +735,14 @@ func (s *Status) GetEndpoint() string { // GetNormalizedCR gets target CR func (s *Status) GetNormalizedCR() *ClickHouseInstallation { - return getInstallationWithReadLock(s, func(s *Status) *ClickHouseInstallation { + return getCRWithReadLock(s, func(s *Status) *ClickHouseInstallation { return s.NormalizedCR }) } // GetNormalizedCRCompleted gets completed CR func (s *Status) GetNormalizedCRCompleted() *ClickHouseInstallation { - return getInstallationWithReadLock(s, func(s *Status) *ClickHouseInstallation { + return getCRWithReadLock(s, func(s *Status) *ClickHouseInstallation { return s.NormalizedCRCompleted }) } @@ -756,7 +756,7 @@ func (s *Status) GetHostsWithTablesCreated() []string { // Begin helpers -func doWithWriteLock(s *Status, f func(s *Status)) { +func doWithWriteLock(s *Status, f func(*Status)) { if s == nil { return } @@ -766,7 +766,7 @@ func doWithWriteLock(s *Status, f func(s *Status)) { f(s) } -func doWithReadLock(s *Status, f func(s *Status)) { +func doWithReadLock(s *Status, f func(*Status)) { if s == nil { return } @@ -776,7 +776,7 @@ func doWithReadLock(s *Status, f func(s *Status)) { f(s) } -func getIntWithReadLock(s *Status, f func(s *Status) int) int { +func getIntWithReadLock(s *Status, f func(*Status) int) int { var zeroVal int if s == nil { return zeroVal @@ -787,7 +787,7 @@ func getIntWithReadLock(s *Status, f func(s *Status) int) int { return f(s) } -func getStringWithReadLock(s *Status, f func(s *Status) string) string { +func getStringWithReadLock(s *Status, f func(*Status) string) string { var zeroVal string if s == nil { return zeroVal @@ -798,7 +798,7 @@ func getStringWithReadLock(s *Status, f func(s *Status) string) string { return f(s) } -func getInstallationWithReadLock(s *Status, f func(s *Status) *ClickHouseInstallation) *ClickHouseInstallation { +func getCRWithReadLock(s *Status, f func(*Status) *ClickHouseInstallation) *ClickHouseInstallation { var zeroVal *ClickHouseInstallation if s == nil { return zeroVal @@ -809,7 +809,7 @@ func getInstallationWithReadLock(s *Status, f func(s *Status) *ClickHouseInstall return f(s) } -func getStringArrWithReadLock(s *Status, f func(s *Status) []string) []string { +func getStringArrWithReadLock(s *Status, f func(*Status) []string) []string { emptyArr := make([]string, 0, 0) if s == nil { return emptyArr From 0bd42bff6e16f1ee1a813f95cbef7e6b3bdde005 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Tue, 10 Dec 2024 15:33:07 +0300 Subject: [PATCH 011/161] dev: start chi status split --- pkg/controller/chi/kube/cr.go | 107 ++++++++++++++++++++++++++++------ 1 file changed, 88 insertions(+), 19 deletions(-) diff --git a/pkg/controller/chi/kube/cr.go b/pkg/controller/chi/kube/cr.go index 1b26f5290..95a4b6552 100644 --- a/pkg/controller/chi/kube/cr.go +++ b/pkg/controller/chi/kube/cr.go @@ -19,6 +19,13 @@ import ( "fmt" "time" + "gopkg.in/yaml.v3" + + core "k8s.io/api/core/v1" + apiErrors "k8s.io/apimachinery/pkg/api/errors" + meta "k8s.io/apimachinery/pkg/apis/meta/v1" + kube "k8s.io/client-go/kubernetes" + log "github.com/altinity/clickhouse-operator/pkg/announcer" api "github.com/altinity/clickhouse-operator/pkg/apis/clickhouse.altinity.com/v1" commonTypes "github.com/altinity/clickhouse-operator/pkg/apis/common/types" @@ -29,11 +36,13 @@ import ( type CR struct { chopClient chopClientSet.Interface + kubeClient kube.Interface } -func NewCR(chopClient chopClientSet.Interface) *CR { +func NewCR(chopClient chopClientSet.Interface, kubeClient kube.Interface) *CR { return &CR{ chopClient: chopClient, + kubeClient: kubeClient, } } @@ -41,19 +50,23 @@ func (c *CR) Get(ctx context.Context, namespace, name string) (api.ICustomResour return c.chopClient.ClickhouseV1().ClickHouseInstallations(namespace).Get(ctx, name, controller.NewGetOptions()) } -// updateCHIObjectStatus updates ClickHouseInstallation object's Status -func (c *CR) StatusUpdate(ctx context.Context, cr api.ICustomResource, opts commonTypes.UpdateStatusOptions) (err error) { +// StatusUpdate updates CR object's Status +func (c *CR) StatusUpdate(ctx context.Context, cr api.ICustomResource, opts commonTypes.UpdateStatusOptions) error { if util.IsContextDone(ctx) { log.V(2).Info("task is done") return nil } + return c.statusUpdateRetry(ctx, cr, opts) +} + +func (c *CR) statusUpdateRetry(ctx context.Context, cr api.ICustomResource, opts commonTypes.UpdateStatusOptions) (err error) { for retry, attempt := true, 1; retry; attempt++ { if attempt > 60 { retry = false } - err = c.doUpdateCRStatus(ctx, cr, opts) + err = c.statusUpdateProcess(ctx, cr, opts) if err == nil { return nil } @@ -68,16 +81,16 @@ func (c *CR) StatusUpdate(ctx context.Context, cr api.ICustomResource, opts comm return } -// doUpdateCRStatus updates ClickHouseInstallation object's Status -func (c *CR) doUpdateCRStatus(ctx context.Context, cr api.ICustomResource, opts commonTypes.UpdateStatusOptions) error { +// statusUpdateProcess updates CR object's Status +func (c *CR) statusUpdateProcess(ctx context.Context, icr api.ICustomResource, opts commonTypes.UpdateStatusOptions) error { if util.IsContextDone(ctx) { log.V(2).Info("task is done") return nil } - chi := cr.(*api.ClickHouseInstallation) - namespace, name := util.NamespaceName(chi) - log.V(3).M(chi).F().Info("Update CHI status") + cr := icr.(*api.ClickHouseInstallation) + namespace, name := cr.NamespaceName() + log.V(3).M(cr).F().Info("Update CR status") _cur, err := c.Get(ctx, namespace, name) cur := _cur.(*api.ClickHouseInstallation) @@ -85,34 +98,34 @@ func (c *CR) doUpdateCRStatus(ctx context.Context, cr api.ICustomResource, opts if opts.TolerateAbsence { return nil } - log.V(1).M(chi).F().Error("%q", err) + log.V(1).M(cr).F().Error("%q", err) return err } if cur == nil { if opts.TolerateAbsence { return nil } - log.V(1).M(chi).F().Error("NULL returned") + log.V(1).M(cr).F().Error("NULL returned") return fmt.Errorf("ERROR GetCR (%s/%s): NULL returned", namespace, name) } - // Update status of a real object. - cur.EnsureStatus().CopyFrom(chi.Status, opts.CopyStatusOptions) + // Update status of a real (current) object. + cur.EnsureStatus().CopyFrom(cr.Status, opts.CopyStatusOptions) - _, err = c.chopClient.ClickhouseV1().ClickHouseInstallations(chi.GetNamespace()).UpdateStatus(ctx, cur, controller.NewUpdateOptions()) + err = c.statusUpdate(ctx, cur) if err != nil { // Error update - log.V(2).M(chi).F().Info("Got error upon update, may retry. err: %q", err) + log.V(2).M(cr).F().Info("Got error upon update, may retry. err: %q", err) return err } _cur, err = c.Get(ctx, namespace, name) cur = _cur.(*api.ClickHouseInstallation) - // Propagate updated ResourceVersion into chi - if chi.GetResourceVersion() != cur.GetResourceVersion() { - log.V(3).M(chi).F().Info("ResourceVersion change: %s to %s", chi.GetResourceVersion(), cur.GetResourceVersion()) - chi.SetResourceVersion(cur.GetResourceVersion()) + // Propagate updated ResourceVersion upstairs into the CR + if cr.GetResourceVersion() != cur.GetResourceVersion() { + log.V(3).M(cr).F().Info("ResourceVersion change: %s to %s", cr.GetResourceVersion(), cur.GetResourceVersion()) + cr.SetResourceVersion(cur.GetResourceVersion()) return nil } @@ -120,3 +133,59 @@ func (c *CR) doUpdateCRStatus(ctx context.Context, cr api.ICustomResource, opts return nil } + +func (c *CR) statusUpdate(ctx context.Context, chi *api.ClickHouseInstallation) error { + chi, cm := c.buildResources(chi) + + err := c.statusUpdateCR(ctx, chi) + if err != nil { + return err + } + + err = c.statusUpdateCM(ctx, cm) + if err != nil { + return err + } + + return nil +} + +func (c *CR) buildResources(chi *api.ClickHouseInstallation) (*api.ClickHouseInstallation, *core.ConfigMap) { + var normalized, normalizedCompleted []byte + if chi.Status.NormalizedCR != nil { + normalized, _ = yaml.Marshal(chi.Status.NormalizedCR) + } + if chi.Status.NormalizedCRCompleted != nil { + normalizedCompleted, _ = yaml.Marshal(chi.Status.NormalizedCRCompleted) + } + cm := &core.ConfigMap{ + ObjectMeta: meta.ObjectMeta{ + Namespace: chi.GetNamespace(), + Name: chi.GetName(), + }, + Data: map[string]string{ + "status-normalized": string(normalized), + "status-normalizedCompleted": string(normalizedCompleted), + }, + } + chi.Status.NormalizedCR = nil + chi.Status.NormalizedCRCompleted = nil + return chi, cm +} + +func (c *CR) statusUpdateCR(ctx context.Context, chi *api.ClickHouseInstallation) error { + _, err := c.chopClient.ClickhouseV1().ClickHouseInstallations(chi.GetNamespace()).UpdateStatus(ctx, chi, controller.NewUpdateOptions()) + return err +} + +func (c *CR) statusUpdateCM(ctx context.Context, cm *core.ConfigMap) error { + if cm == nil { + return nil + } + cmm := NewConfigMap(c.kubeClient) + _, err := cmm.Update(ctx, cm) + if apiErrors.IsNotFound(err) { + _, err = cmm.Create(ctx, cm) + } + return err +} From f61e08af266bab22396f7ef59507e188d4eeeb4f Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Tue, 10 Dec 2024 15:33:28 +0300 Subject: [PATCH 012/161] dev: format --- pkg/controller/chk/kube/config-map.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/controller/chk/kube/config-map.go b/pkg/controller/chk/kube/config-map.go index 558cdc581..0ff88cd18 100644 --- a/pkg/controller/chk/kube/config-map.go +++ b/pkg/controller/chk/kube/config-map.go @@ -16,10 +16,10 @@ package kube import ( "context" - "k8s.io/apimachinery/pkg/labels" core "k8s.io/api/core/v1" meta "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" ) From c4d07fd6961222dd92dca57e84c3a74fe6cc0125 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Tue, 10 Dec 2024 15:33:46 +0300 Subject: [PATCH 013/161] dev: format --- pkg/controller/chk/kube/pdb.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/controller/chk/kube/pdb.go b/pkg/controller/chk/kube/pdb.go index 39bcc33c4..6e9ccb5d9 100644 --- a/pkg/controller/chk/kube/pdb.go +++ b/pkg/controller/chk/kube/pdb.go @@ -16,10 +16,10 @@ package kube import ( "context" - "k8s.io/apimachinery/pkg/labels" policy "k8s.io/api/policy/v1" meta "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" ) From 086831c53fc1eddb320965302cf4cad24e155871 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Tue, 10 Dec 2024 15:34:41 +0300 Subject: [PATCH 014/161] dev: streamline cr update process --- pkg/controller/chk/kube/cr.go | 43 +++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/pkg/controller/chk/kube/cr.go b/pkg/controller/chk/kube/cr.go index 68363e40f..9ddc50476 100644 --- a/pkg/controller/chk/kube/cr.go +++ b/pkg/controller/chk/kube/cr.go @@ -20,13 +20,13 @@ import ( "time" "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" log "github.com/altinity/clickhouse-operator/pkg/announcer" apiChk "github.com/altinity/clickhouse-operator/pkg/apis/clickhouse-keeper.altinity.com/v1" api "github.com/altinity/clickhouse-operator/pkg/apis/clickhouse.altinity.com/v1" commonTypes "github.com/altinity/clickhouse-operator/pkg/apis/common/types" "github.com/altinity/clickhouse-operator/pkg/util" - "sigs.k8s.io/controller-runtime/pkg/client" ) type CR struct { @@ -52,19 +52,23 @@ func (c *CR) Get(ctx context.Context, namespace, name string) (api.ICustomResour } } -// updateCHIObjectStatus updates ClickHouseInstallation object's Status -func (c *CR) StatusUpdate(ctx context.Context, cr api.ICustomResource, opts commonTypes.UpdateStatusOptions) (err error) { +// StatusUpdate updates CR object's Status +func (c *CR) StatusUpdate(ctx context.Context, cr api.ICustomResource, opts commonTypes.UpdateStatusOptions) error { if util.IsContextDone(ctx) { log.V(2).Info("task is done") return nil } + return c.statusUpdateRetry(ctx, cr, opts) +} + +func (c *CR) statusUpdateRetry(ctx context.Context, cr api.ICustomResource, opts commonTypes.UpdateStatusOptions) (err error) { for retry, attempt := true, 1; retry; attempt++ { if attempt > 60 { retry = false } - err = c.doUpdateCRStatus(ctx, cr, opts) + err = c.statusUpdateProcess(ctx, cr, opts) if err == nil { return nil } @@ -79,16 +83,16 @@ func (c *CR) StatusUpdate(ctx context.Context, cr api.ICustomResource, opts comm return } -// doUpdateCRStatus updates ClickHouseInstallation object's Status -func (c *CR) doUpdateCRStatus(ctx context.Context, cr api.ICustomResource, opts commonTypes.UpdateStatusOptions) error { +// statusUpdateProcess updates CR object's Status +func (c *CR) statusUpdateProcess(ctx context.Context, icr api.ICustomResource, opts commonTypes.UpdateStatusOptions) error { if util.IsContextDone(ctx) { log.V(2).Info("task is done") return nil } - chk := cr.(*apiChk.ClickHouseKeeperInstallation) - namespace, name := util.NamespaceName(chk) - log.V(3).M(chk).F().Info("Update CHK status") + cr := icr.(*apiChk.ClickHouseKeeperInstallation) + namespace, name := cr.NamespaceName() + log.V(3).M(cr).F().Info("Update CR status") _cur, err := c.Get(ctx, namespace, name) cur := _cur.(*apiChk.ClickHouseKeeperInstallation) @@ -96,24 +100,24 @@ func (c *CR) doUpdateCRStatus(ctx context.Context, cr api.ICustomResource, opts if opts.TolerateAbsence { return nil } - log.V(1).M(chk).F().Error("%q", err) + log.V(1).M(cr).F().Error("%q", err) return err } if cur == nil { if opts.TolerateAbsence { return nil } - log.V(1).M(chk).F().Error("NULL returned") + log.V(1).M(cr).F().Error("NULL returned") return fmt.Errorf("ERROR GetCR (%s/%s): NULL returned", namespace, name) } // Update status of a real object. - cur.EnsureStatus().CopyFrom(chk.Status, opts.CopyStatusOptions) + cur.EnsureStatus().CopyFrom(cr.Status, opts.CopyStatusOptions) - err = c.kubeClient.Status().Update(ctx, cur) + err = c.statusUpdate(ctx, cur) if err != nil { // Error update - log.V(2).M(chk).F().Info("Got error upon update, may retry. err: %q", err) + log.V(2).M(cr).F().Info("Got error upon update, may retry. err: %q", err) return err } @@ -121,9 +125,9 @@ func (c *CR) doUpdateCRStatus(ctx context.Context, cr api.ICustomResource, opts cur = _cur.(*apiChk.ClickHouseKeeperInstallation) // Propagate updated ResourceVersion into chi - if chk.GetResourceVersion() != cur.GetResourceVersion() { - log.V(3).M(chk).F().Info("ResourceVersion change: %s to %s", chk.GetResourceVersion(), cur.GetResourceVersion()) - chk.SetResourceVersion(cur.GetResourceVersion()) + if cr.GetResourceVersion() != cur.GetResourceVersion() { + log.V(3).M(cr).F().Info("ResourceVersion change: %s to %s", cr.GetResourceVersion(), cur.GetResourceVersion()) + cr.SetResourceVersion(cur.GetResourceVersion()) return nil } @@ -131,3 +135,8 @@ func (c *CR) doUpdateCRStatus(ctx context.Context, cr api.ICustomResource, opts return nil } + +func (c *CR) statusUpdate(ctx context.Context, chk *apiChk.ClickHouseKeeperInstallation) error { + err := c.kubeClient.Status().Update(ctx, chk) + return err +} From 169890d1ef58845457336ab645ea7b743eda19ee Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Tue, 10 Dec 2024 15:34:57 +0300 Subject: [PATCH 015/161] dev: format --- pkg/controller/chk/kube/statesfulset.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/controller/chk/kube/statesfulset.go b/pkg/controller/chk/kube/statesfulset.go index 3db13312c..5c87d270c 100644 --- a/pkg/controller/chk/kube/statesfulset.go +++ b/pkg/controller/chk/kube/statesfulset.go @@ -16,6 +16,7 @@ package kube import ( "context" + "gopkg.in/yaml.v3" apps "k8s.io/api/apps/v1" From 9999a0a15b58bbc324dba677a67c4d317fc49ef0 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Tue, 10 Dec 2024 15:35:23 +0300 Subject: [PATCH 016/161] dev: pass all kube components into adapter --- pkg/controller/chi/kube/adapter-kube.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/controller/chi/kube/adapter-kube.go b/pkg/controller/chi/kube/adapter-kube.go index 4c06dd190..775d7d7f3 100644 --- a/pkg/controller/chi/kube/adapter-kube.go +++ b/pkg/controller/chi/kube/adapter-kube.go @@ -49,7 +49,7 @@ func NewAdapter(kubeClient kube.Interface, chopClient chopClientSet.Interface, n kubeClient: kubeClient, namer: namer, - cr: NewCR(chopClient), + cr: NewCR(chopClient, kubeClient), configMap: NewConfigMap(kubeClient), deployment: NewDeployment(kubeClient), From 72caef84e7c80a8a92d2af2e2ea8d4cbd122b89f Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Tue, 10 Dec 2024 16:35:56 +0300 Subject: [PATCH 017/161] dev: split getter --- pkg/controller/chi/kube/cr.go | 44 +++++++++++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 5 deletions(-) diff --git a/pkg/controller/chi/kube/cr.go b/pkg/controller/chi/kube/cr.go index 95a4b6552..5db6eaa6b 100644 --- a/pkg/controller/chi/kube/cr.go +++ b/pkg/controller/chi/kube/cr.go @@ -47,9 +47,30 @@ func NewCR(chopClient chopClientSet.Interface, kubeClient kube.Interface) *CR { } func (c *CR) Get(ctx context.Context, namespace, name string) (api.ICustomResource, error) { + chi, err := c.getCR(ctx, namespace, name) + if err != nil { + return nil, err + } + + cm, _ := c.getCM(ctx, chi) + + chi = c.buildCR(chi, cm) + + return chi, nil +} + +func (c *CR) getCR(ctx context.Context, namespace, name string) (*api.ClickHouseInstallation, error) { return c.chopClient.ClickhouseV1().ClickHouseInstallations(namespace).Get(ctx, name, controller.NewGetOptions()) } +func (c *CR) getCM(ctx context.Context, chi api.ICustomResource) (*core.ConfigMap, error) { + return NewConfigMap(c.kubeClient).Get(ctx, c.buildCMNamespace(chi), c.buildCMName(chi)) +} + +func (c *CR) buildCR(chi *api.ClickHouseInstallation, cm *core.ConfigMap) *api.ClickHouseInstallation { + return chi +} + // StatusUpdate updates CR object's Status func (c *CR) StatusUpdate(ctx context.Context, cr api.ICustomResource, opts commonTypes.UpdateStatusOptions) error { if util.IsContextDone(ctx) { @@ -146,7 +167,7 @@ func (c *CR) statusUpdate(ctx context.Context, chi *api.ClickHouseInstallation) if err != nil { return err } - + return nil } @@ -160,12 +181,12 @@ func (c *CR) buildResources(chi *api.ClickHouseInstallation) (*api.ClickHouseIns } cm := &core.ConfigMap{ ObjectMeta: meta.ObjectMeta{ - Namespace: chi.GetNamespace(), - Name: chi.GetName(), + Namespace: c.buildCMNamespace(chi), + Name: c.buildCMName(chi), }, Data: map[string]string{ - "status-normalized": string(normalized), - "status-normalizedCompleted": string(normalizedCompleted), + statusNormalized: string(normalized), + statusNormalizedCompleted: string(normalizedCompleted), }, } chi.Status.NormalizedCR = nil @@ -189,3 +210,16 @@ func (c *CR) statusUpdateCM(ctx context.Context, cm *core.ConfigMap) error { } return err } + +func (c *CR) buildCMNamespace(obj meta.Object) string { + return obj.GetNamespace() +} + +func (c *CR) buildCMName(obj meta.Object) string { + return obj.GetName() +} + +const ( +statusNormalized = "status-normalized" +statusNormalizedCompleted = "status-normalizedCompleted" +) \ No newline at end of file From 9033060a523263a74a29725f8c4357071dafe296 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Wed, 11 Dec 2024 14:37:26 +0300 Subject: [PATCH 018/161] dev: populate status fields --- pkg/controller/chi/kube/cr.go | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/pkg/controller/chi/kube/cr.go b/pkg/controller/chi/kube/cr.go index 5db6eaa6b..3cf8d0b2d 100644 --- a/pkg/controller/chi/kube/cr.go +++ b/pkg/controller/chi/kube/cr.go @@ -68,6 +68,26 @@ func (c *CR) getCM(ctx context.Context, chi api.ICustomResource) (*core.ConfigMa } func (c *CR) buildCR(chi *api.ClickHouseInstallation, cm *core.ConfigMap) *api.ClickHouseInstallation { + if cm == nil { + return chi + } + + if len(cm.Data[statusNormalized]) > 0 { + normalized := &api.ClickHouseInstallation{} + if yaml.Unmarshal([]byte(cm.Data[statusNormalized]), normalized) != nil { + return chi + } + chi.EnsureStatus().NormalizedCR = normalized + } + + if len( cm.Data[statusNormalizedCompleted])>0 { + normalizedCompleted := &api.ClickHouseInstallation{} + if yaml.Unmarshal([]byte(cm.Data[statusNormalizedCompleted]), normalizedCompleted) != nil { + return chi + } + chi.EnsureStatus().NormalizedCRCompleted = normalizedCompleted + } + return chi } From 68ede26c438ec72f324cc68170b781fb116cba7a Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Wed, 11 Dec 2024 14:40:05 +0300 Subject: [PATCH 019/161] test: regression --- tests/regression.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/regression.py b/tests/regression.py index e669dc05b..b4fa3bb70 100755 --- a/tests/regression.py +++ b/tests/regression.py @@ -8,7 +8,8 @@ xfails = { # test_operator.py "/regression/e2e.test_operator/test_008*": [(Fail, "Test 008 sometimes fails due to unknown reasons")], - "/regression/e2e.test_operator/test_032:": [(Fail, "Test 032 sometimes fails due to unknown reasons")], + "/regression/e2e.test_operator/test_014*": [(Fail, "Test 014 sometimes fails due to unknown reasons")], + "/regression/e2e.test_operator/test_032*": [(Fail, "Test 032 sometimes fails due to unknown reasons")], # test_clickhouse.py "/regression/e2e.test_clickhouse/test_ch_001*": [(Fail, "Insert Quorum test need to refactoring")], # test_metrics_alerts.py From cf52a0ddf04cd2ee8f57d25a5e3c4dd72cf33025 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Wed, 11 Dec 2024 14:40:32 +0300 Subject: [PATCH 020/161] test: disable normalized field check --- tests/e2e/test_operator.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/e2e/test_operator.py b/tests/e2e/test_operator.py index e5a5a3ccb..fdc9ec6f5 100644 --- a/tests/e2e/test_operator.py +++ b/tests/e2e/test_operator.py @@ -856,8 +856,8 @@ def test_011_2(self): with Then("Default user plain password should be removed"): chi = kubectl.get("chi", "test-011-secured-default") - assert "default/password" in chi["status"]["normalizedCompleted"]["spec"]["configuration"]["users"] - assert chi["status"]["normalizedCompleted"]["spec"]["configuration"]["users"]["default/password"] == "" + # assert "default/password" in chi["status"]["normalizedCompleted"]["spec"]["configuration"]["users"] + # assert chi["status"]["normalizedCompleted"]["spec"]["configuration"]["users"]["default/password"] == "" cfm = kubectl.get("configmap", "chi-test-011-secured-default-common-usersd") assert '' in cfm["data"]["chop-generated-users.xml"] @@ -2617,8 +2617,8 @@ def test_023(self): assert kubectl.get_field("chi", chi, ".status.usedTemplates[1].name") == "extension-annotations" # assert kubectl.get_field("chi", chi, ".status.usedTemplates[2].name") == "" - with Then("Annotation from a template should be populated"): - assert kubectl.get_field("chi", chi, ".status.normalizedCompleted.metadata.annotations.test") == "test" + # with Then("Annotation from a template should be populated"): + # assert kubectl.get_field("chi", chi, ".status.normalizedCompleted.metadata.annotations.test") == "test" with Then("Pod annotation should populated from template"): assert kubectl.get_field("pod", f"chi-{chi}-single-0-0-0", ".metadata.annotations.test") == "test" with Then("Environment variable from a template should be populated"): From 44598fa9244dd029956e3c48aaf0726001616155 Mon Sep 17 00:00:00 2001 From: Amir Alavi Date: Wed, 11 Dec 2024 06:43:37 -0500 Subject: [PATCH 021/161] feat: ability to suspend `ClickHouseInstallation` and `ClickHouseKeeper` reconciliation (#1580) For ClickHouse technical maintenance or debugging, it would be ideal to suspend reconciliation. For our use-case, we plan on suspending CRs during Blue/Green cluster upgrades, so as we move workloads from one cluster to another (one namespace at a time), we would need the operator to suspend any reconciliation and only resume once the namespace is fully migrated to the new cluster Signed-off-by: Amir Alavi --- cmd/operator/app/thread_keeper.go | 38 +++++- cmd/operator/app/thread_keeper_test.go | 116 ++++++++++++++++++ ...l-template-01-section-crd-01-chi-chit.yaml | 12 ++ ...l-yaml-template-01-section-crd-03-chk.yaml | 12 ++ ...installations.clickhouse.altinity.com.yaml | 37 ++++++ ...tiontemplates.clickhouse.altinity.com.yaml | 37 ++++++ ...ations.clickhouse-keeper.altinity.com.yaml | 37 ++++++ .../ServiceAccount-clickhouse-operator.yaml | 1 - .../clickhouse-operator-install-ansible.yaml | 36 ++++++ ...house-operator-install-bundle-v1beta1.yaml | 35 +++++- .../clickhouse-operator-install-bundle.yaml | 36 ++++++ ...use-operator-install-template-v1beta1.yaml | 35 +++++- .../clickhouse-operator-install-template.yaml | 36 ++++++ .../clickhouse-operator-install-tf.yaml | 36 ++++++ deploy/operator/parts/crd.yaml | 111 +++++++++++++++++ .../v1/type_spec.go | 7 ++ .../v1/zz_generated.deepcopy.go | 5 + .../clickhouse.altinity.com/v1/type_spec.go | 8 ++ .../v1/zz_generated.deepcopy.go | 5 + pkg/controller/chi/controller.go | 18 ++- pkg/controller/chi/controller_test.go | 47 +++++++ pkg/controller/chk/worker-chk-reconciler.go | 5 + 22 files changed, 704 insertions(+), 6 deletions(-) create mode 100644 cmd/operator/app/thread_keeper_test.go create mode 100644 pkg/controller/chi/controller_test.go diff --git a/cmd/operator/app/thread_keeper.go b/cmd/operator/app/thread_keeper.go index b200970be..19614d84d 100644 --- a/cmd/operator/app/thread_keeper.go +++ b/cmd/operator/app/thread_keeper.go @@ -10,8 +10,11 @@ import ( clientGoScheme "k8s.io/client-go/kubernetes/scheme" ctrl "sigs.k8s.io/controller-runtime" ctrlRuntime "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/builder" "sigs.k8s.io/controller-runtime/pkg/cache" + "sigs.k8s.io/controller-runtime/pkg/event" "sigs.k8s.io/controller-runtime/pkg/log/zap" + "sigs.k8s.io/controller-runtime/pkg/predicate" // ctrl "sigs.k8s.io/controller-runtime/pkg/controller" @@ -56,7 +59,7 @@ func initKeeper(ctx context.Context) error { err = ctrlRuntime. NewControllerManagedBy(manager). - For(&api.ClickHouseKeeperInstallation{}). + For(&api.ClickHouseKeeperInstallation{}, builder.WithPredicates(keeperPredicate())). Owns(&apps.StatefulSet{}). Complete( &controller.Controller{ @@ -81,3 +84,36 @@ func runKeeper(ctx context.Context) error { // Run successful return nil } + +func keeperPredicate() predicate.Funcs { + return predicate.Funcs{ + CreateFunc: func(e event.CreateEvent) bool { + obj, ok := e.Object.(*api.ClickHouseKeeperInstallation) + if !ok { + return false + } + + if obj.Spec.Suspend.Value() { + return false + } + return true + }, + DeleteFunc: func(e event.DeleteEvent) bool { + return true + }, + UpdateFunc: func(e event.UpdateEvent) bool { + obj, ok := e.ObjectNew.(*api.ClickHouseKeeperInstallation) + if !ok { + return false + } + + if obj.Spec.Suspend.Value() { + return false + } + return true + }, + GenericFunc: func(e event.GenericEvent) bool { + return true + }, + } +} diff --git a/cmd/operator/app/thread_keeper_test.go b/cmd/operator/app/thread_keeper_test.go new file mode 100644 index 000000000..6da3d7b81 --- /dev/null +++ b/cmd/operator/app/thread_keeper_test.go @@ -0,0 +1,116 @@ +package app + +import ( + "testing" + + "github.com/altinity/clickhouse-operator/pkg/apis/common/types" + "sigs.k8s.io/controller-runtime/pkg/event" + + api "github.com/altinity/clickhouse-operator/pkg/apis/clickhouse-keeper.altinity.com/v1" +) + +func Test_keeperPredicateCreate(t *testing.T) { + tests := []struct { + name string + want bool + evt event.CreateEvent + }{ + { + name: "skips create when suspended", + want: false, + evt: event.CreateEvent{ + Object: &api.ClickHouseKeeperInstallation{ + Spec: api.ChkSpec{ + Suspend: types.NewStringBool(true), + }, + }, + }, + }, + { + name: "queues create when not suspended", + want: true, + evt: event.CreateEvent{ + Object: &api.ClickHouseKeeperInstallation{ + Spec: api.ChkSpec{ + Suspend: types.NewStringBool(false), + }, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + predicate := keeperPredicate() + if got := predicate.Create(tt.evt); tt.want != got { + t.Errorf("keeperPredicate.Create() = %v, want %v", got, tt.want) + } + }) + } +} + +func Test_keeperPredicateUpdate(t *testing.T) { + tests := []struct { + name string + want bool + evt event.UpdateEvent + }{ + { + name: "skips update when suspended", + want: false, + evt: event.UpdateEvent{ + ObjectNew: &api.ClickHouseKeeperInstallation{ + Spec: api.ChkSpec{ + Suspend: types.NewStringBool(true), + }, + }, + }, + }, + { + name: "queues update when not suspended", + want: true, + evt: event.UpdateEvent{ + ObjectNew: &api.ClickHouseKeeperInstallation{ + Spec: api.ChkSpec{ + Suspend: types.NewStringBool(false), + }, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + predicate := keeperPredicate() + if got := predicate.Update(tt.evt); tt.want != got { + t.Errorf("keeperPredicate.Update() = %v, want %v", got, tt.want) + } + }) + } +} + +func Test_keeperPredicateDelete(t *testing.T) { + tests := []struct { + name string + want bool + evt event.DeleteEvent + }{ + { + name: "deletes even when suspended", + want: true, + evt: event.DeleteEvent{ + Object: &api.ClickHouseKeeperInstallation{ + Spec: api.ChkSpec{ + Suspend: types.NewStringBool(true), + }, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + predicate := keeperPredicate() + if got := predicate.Delete(tt.evt); tt.want != got { + t.Errorf("keeperPredicate.Delete() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/deploy/builder/templates-install-bundle/clickhouse-operator-install-yaml-template-01-section-crd-01-chi-chit.yaml b/deploy/builder/templates-install-bundle/clickhouse-operator-install-yaml-template-01-section-crd-01-chi-chit.yaml index 2ef4e8bfc..901b89b54 100644 --- a/deploy/builder/templates-install-bundle/clickhouse-operator-install-yaml-template-01-section-crd-01-chi-chit.yaml +++ b/deploy/builder/templates-install-bundle/clickhouse-operator-install-yaml-template-01-section-crd-01-chi-chit.yaml @@ -92,6 +92,11 @@ spec: description: Age of the resource # Displayed in all priorities jsonPath: .metadata.creationTimestamp + - name: suspend + type: string + description: Suspend reconciliation + # Displayed in all priorities + jsonPath: .spec.suspend subresources: status: {} schema: @@ -308,6 +313,13 @@ spec: enum: - "" - "RollingUpdate" + suspend: + <<: *TypeStringBool + description: | + Suspend reconciliation of resources managed by a ClickHouse Installation. + Works as the following: + - When `suspend` is `true` operator stops reconciling all resources. + - When `suspend` is `false` or not set, operator reconciles all resources. troubleshoot: <<: *TypeStringBool description: | diff --git a/deploy/builder/templates-install-bundle/clickhouse-operator-install-yaml-template-01-section-crd-03-chk.yaml b/deploy/builder/templates-install-bundle/clickhouse-operator-install-yaml-template-01-section-crd-03-chk.yaml index a6aa1616b..3c036913c 100644 --- a/deploy/builder/templates-install-bundle/clickhouse-operator-install-yaml-template-01-section-crd-03-chk.yaml +++ b/deploy/builder/templates-install-bundle/clickhouse-operator-install-yaml-template-01-section-crd-03-chk.yaml @@ -88,6 +88,11 @@ spec: description: Age of the resource # Displayed in all priorities jsonPath: .metadata.creationTimestamp + - name: suspend + type: string + description: Suspend reconciliation + # Displayed in all priorities + jsonPath: .spec.suspend subresources: status: {} schema: @@ -296,6 +301,13 @@ spec: - "disabled" - "Enabled" - "enabled" + suspend: + <<: *TypeStringBool + description: | + Suspend reconciliation of resources managed by a ClickHouse Keeper. + Works as the following: + - When `suspend` is `true` operator stops reconciling all keeper resources. + - When `suspend` is `false` or not set, operator reconciles all keeper resources. namespaceDomainPattern: type: string description: | diff --git a/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhouseinstallations.clickhouse.altinity.com.yaml b/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhouseinstallations.clickhouse.altinity.com.yaml index f14e5fb20..215526181 100644 --- a/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhouseinstallations.clickhouse.altinity.com.yaml +++ b/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhouseinstallations.clickhouse.altinity.com.yaml @@ -92,6 +92,11 @@ spec: description: Age of the resource # Displayed in all priorities jsonPath: .metadata.creationTimestamp + - name: suspend + type: string + description: Suspend reconciliation + # Displayed in all priorities + jsonPath: .spec.suspend subresources: status: {} schema: @@ -308,6 +313,38 @@ spec: enum: - "" - "RollingUpdate" + suspend: &TypeStringBool + type: string + description: | + Suspend reconciliation of resources managed by a ClickHouse Installation. + Works as the following: + - When `suspend` is `true` operator stops reconciling all resources. + - When `suspend` is `false` or not set, operator reconciles all resources. + enum: + # List StringBoolXXX constants from model + - "" + - "0" + - "1" + - "False" + - "false" + - "True" + - "true" + - "No" + - "no" + - "Yes" + - "yes" + - "Off" + - "off" + - "On" + - "on" + - "Disable" + - "disable" + - "Enable" + - "enable" + - "Disabled" + - "disabled" + - "Enabled" + - "enabled" troubleshoot: !!merge <<: *TypeStringBool description: | diff --git a/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhouseinstallationtemplates.clickhouse.altinity.com.yaml b/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhouseinstallationtemplates.clickhouse.altinity.com.yaml index a41cee7bc..1adc6d434 100644 --- a/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhouseinstallationtemplates.clickhouse.altinity.com.yaml +++ b/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhouseinstallationtemplates.clickhouse.altinity.com.yaml @@ -92,6 +92,11 @@ spec: description: Age of the resource # Displayed in all priorities jsonPath: .metadata.creationTimestamp + - name: suspend + type: string + description: Suspend reconciliation + # Displayed in all priorities + jsonPath: .spec.suspend subresources: status: {} schema: @@ -308,6 +313,38 @@ spec: enum: - "" - "RollingUpdate" + suspend: &TypeStringBool + type: string + description: | + Suspend reconciliation of resources managed by a ClickHouse Installation. + Works as the following: + - When `suspend` is `true` operator stops reconciling all resources. + - When `suspend` is `false` or not set, operator reconciles all resources. + enum: + # List StringBoolXXX constants from model + - "" + - "0" + - "1" + - "False" + - "false" + - "True" + - "true" + - "No" + - "no" + - "Yes" + - "yes" + - "Off" + - "off" + - "On" + - "on" + - "Disable" + - "disable" + - "Enable" + - "enable" + - "Disabled" + - "disabled" + - "Enabled" + - "enabled" troubleshoot: !!merge <<: *TypeStringBool description: | diff --git a/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhousekeeperinstallations.clickhouse-keeper.altinity.com.yaml b/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhousekeeperinstallations.clickhouse-keeper.altinity.com.yaml index 8a6db55cc..8221b2a0a 100644 --- a/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhousekeeperinstallations.clickhouse-keeper.altinity.com.yaml +++ b/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhousekeeperinstallations.clickhouse-keeper.altinity.com.yaml @@ -88,6 +88,11 @@ spec: description: Age of the resource # Displayed in all priorities jsonPath: .metadata.creationTimestamp + - name: suspend + type: string + description: Suspend reconciliation + # Displayed in all priorities + jsonPath: .spec.suspend subresources: status: {} schema: @@ -296,6 +301,38 @@ spec: - "disabled" - "Enabled" - "enabled" + suspend: &TypeStringBool + type: string + description: | + Suspend reconciliation of resources managed by a ClickHouse Keeper. + Works as the following: + - When `suspend` is `true` operator stops reconciling all keeper resources. + - When `suspend` is `false` or not set, operator reconciles all keeper resources. + enum: + # List StringBoolXXX constants from model + - "" + - "0" + - "1" + - "False" + - "false" + - "True" + - "true" + - "No" + - "no" + - "Yes" + - "yes" + - "Off" + - "off" + - "On" + - "on" + - "Disable" + - "disable" + - "Enable" + - "enable" + - "Disabled" + - "disabled" + - "Enabled" + - "enabled" namespaceDomainPattern: type: string description: | diff --git a/deploy/helm/clickhouse-operator/templates/generated/ServiceAccount-clickhouse-operator.yaml b/deploy/helm/clickhouse-operator/templates/generated/ServiceAccount-clickhouse-operator.yaml index 3bc8d89af..803619f97 100644 --- a/deploy/helm/clickhouse-operator/templates/generated/ServiceAccount-clickhouse-operator.yaml +++ b/deploy/helm/clickhouse-operator/templates/generated/ServiceAccount-clickhouse-operator.yaml @@ -13,7 +13,6 @@ metadata: namespace: {{ .Release.Namespace }} labels: {{ include "altinity-clickhouse-operator.labels" . | nindent 4 }} annotations: {{ toYaml .Values.serviceAccount.annotations | nindent 4 }} - # Template Parameters: # # NAMESPACE=kube-system diff --git a/deploy/operator/clickhouse-operator-install-ansible.yaml b/deploy/operator/clickhouse-operator-install-ansible.yaml index 682b93235..a6ac26f2d 100644 --- a/deploy/operator/clickhouse-operator-install-ansible.yaml +++ b/deploy/operator/clickhouse-operator-install-ansible.yaml @@ -99,6 +99,11 @@ spec: description: Age of the resource # Displayed in all priorities jsonPath: .metadata.creationTimestamp + - name: suspend + type: string + description: Suspend reconciliation + # Displayed in all priorities + jsonPath: .spec.suspend subresources: status: {} schema: @@ -315,6 +320,13 @@ spec: enum: - "" - "RollingUpdate" + suspend: + <<: *TypeStringBool + description: | + Suspend reconciliation of resources managed by a ClickHouse Installation. + Works as the following: + - When `suspend` is `true` operator stops reconciling all resources. + - When `suspend` is `false` or not set, operator reconciles all resources. troubleshoot: <<: *TypeStringBool description: | @@ -1379,6 +1391,11 @@ spec: description: Age of the resource # Displayed in all priorities jsonPath: .metadata.creationTimestamp + - name: suspend + type: string + description: Suspend reconciliation + # Displayed in all priorities + jsonPath: .spec.suspend subresources: status: {} schema: @@ -1595,6 +1612,13 @@ spec: enum: - "" - "RollingUpdate" + suspend: + <<: *TypeStringBool + description: | + Suspend reconciliation of resources managed by a ClickHouse Installation. + Works as the following: + - When `suspend` is `true` operator stops reconciling all resources. + - When `suspend` is `false` or not set, operator reconciles all resources. troubleshoot: <<: *TypeStringBool description: | @@ -3111,6 +3135,11 @@ spec: description: Age of the resource # Displayed in all priorities jsonPath: .metadata.creationTimestamp + - name: suspend + type: string + description: Suspend reconciliation + # Displayed in all priorities + jsonPath: .spec.suspend subresources: status: {} schema: @@ -3319,6 +3348,13 @@ spec: - "disabled" - "Enabled" - "enabled" + suspend: + <<: *TypeStringBool + description: | + Suspend reconciliation of resources managed by a ClickHouse Keeper. + Works as the following: + - When `suspend` is `true` operator stops reconciling all keeper resources. + - When `suspend` is `false` or not set, operator reconciles all keeper resources. namespaceDomainPattern: type: string description: | diff --git a/deploy/operator/clickhouse-operator-install-bundle-v1beta1.yaml b/deploy/operator/clickhouse-operator-install-bundle-v1beta1.yaml index 57af7a0c2..d491ad103 100644 --- a/deploy/operator/clickhouse-operator-install-bundle-v1beta1.yaml +++ b/deploy/operator/clickhouse-operator-install-bundle-v1beta1.yaml @@ -88,6 +88,10 @@ spec: type: date description: Age of the resource JSONPath: .metadata.creationTimestamp + - name: suspend + type: string + description: Suspend reconciliation + JSONPath: .spec.suspend subresources: status: {} validation: @@ -304,6 +308,13 @@ spec: enum: - "" - "RollingUpdate" + suspend: + !!merge <<: *TypeStringBool + description: | + Suspend reconciliation of resources managed by a ClickHouse Installation. + Works as the following: + - When `suspend` is `true` operator stops reconciling all resources. + - When `suspend` is `false` or not set, operator reconciles all resources. troubleshoot: !!merge <<: *TypeStringBool description: | @@ -1359,6 +1370,10 @@ spec: type: date description: Age of the resource JSONPath: .metadata.creationTimestamp + - name: suspend + type: string + description: Suspend reconciliation + JSONPath: .spec.suspend validation: openAPIV3Schema: description: "define a set of Kubernetes resources (StatefulSet, PVC, Service, ConfigMap) which describe behavior one or more clusters" @@ -1573,6 +1588,13 @@ spec: enum: - "" - "RollingUpdate" + suspend: + !!merge <<: *TypeStringBool + description: | + Suspend reconciliation of resources managed by a ClickHouse Installation. + Works as the following: + - When `suspend` is `true` operator stops reconciling all resources. + - When `suspend` is `false` or not set, operator reconciles all resources. troubleshoot: !!merge <<: *TypeStringBool description: | @@ -3074,6 +3096,11 @@ spec: description: Age of the resource # Displayed in all priorities jsonPath: .metadata.creationTimestamp + - name: suspend + type: string + description: Suspend reconciliation + # Displayed in all priorities + jsonPath: .spec.suspend subresources: status: {} schema: @@ -3282,6 +3309,13 @@ spec: - "disabled" - "Enabled" - "enabled" + suspend: + !!merge <<: *TypeStringBool + description: | + Suspend reconciliation of resources managed by a ClickHouse Keeper. + Works as the following: + - When `suspend` is `true` operator stops reconciling all keeper resources. + - When `suspend` is `false` or not set, operator reconciles all keeper resources. namespaceDomainPattern: type: string description: | @@ -3835,7 +3869,6 @@ metadata: namespace: kube-system labels: clickhouse.altinity.com/chop: 0.24.3 - # Template Parameters: # # NAMESPACE=kube-system diff --git a/deploy/operator/clickhouse-operator-install-bundle.yaml b/deploy/operator/clickhouse-operator-install-bundle.yaml index 80c563c4b..3720cfc30 100644 --- a/deploy/operator/clickhouse-operator-install-bundle.yaml +++ b/deploy/operator/clickhouse-operator-install-bundle.yaml @@ -92,6 +92,11 @@ spec: description: Age of the resource # Displayed in all priorities jsonPath: .metadata.creationTimestamp + - name: suspend + type: string + description: Suspend reconciliation + # Displayed in all priorities + jsonPath: .spec.suspend subresources: status: {} schema: @@ -308,6 +313,13 @@ spec: enum: - "" - "RollingUpdate" + suspend: + <<: *TypeStringBool + description: | + Suspend reconciliation of resources managed by a ClickHouse Installation. + Works as the following: + - When `suspend` is `true` operator stops reconciling all resources. + - When `suspend` is `false` or not set, operator reconciles all resources. troubleshoot: <<: *TypeStringBool description: | @@ -1372,6 +1384,11 @@ spec: description: Age of the resource # Displayed in all priorities jsonPath: .metadata.creationTimestamp + - name: suspend + type: string + description: Suspend reconciliation + # Displayed in all priorities + jsonPath: .spec.suspend subresources: status: {} schema: @@ -1588,6 +1605,13 @@ spec: enum: - "" - "RollingUpdate" + suspend: + <<: *TypeStringBool + description: | + Suspend reconciliation of resources managed by a ClickHouse Installation. + Works as the following: + - When `suspend` is `true` operator stops reconciling all resources. + - When `suspend` is `false` or not set, operator reconciles all resources. troubleshoot: <<: *TypeStringBool description: | @@ -3104,6 +3128,11 @@ spec: description: Age of the resource # Displayed in all priorities jsonPath: .metadata.creationTimestamp + - name: suspend + type: string + description: Suspend reconciliation + # Displayed in all priorities + jsonPath: .spec.suspend subresources: status: {} schema: @@ -3312,6 +3341,13 @@ spec: - "disabled" - "Enabled" - "enabled" + suspend: + <<: *TypeStringBool + description: | + Suspend reconciliation of resources managed by a ClickHouse Keeper. + Works as the following: + - When `suspend` is `true` operator stops reconciling all keeper resources. + - When `suspend` is `false` or not set, operator reconciles all keeper resources. namespaceDomainPattern: type: string description: | diff --git a/deploy/operator/clickhouse-operator-install-template-v1beta1.yaml b/deploy/operator/clickhouse-operator-install-template-v1beta1.yaml index bc7f69504..928d1ad15 100644 --- a/deploy/operator/clickhouse-operator-install-template-v1beta1.yaml +++ b/deploy/operator/clickhouse-operator-install-template-v1beta1.yaml @@ -88,6 +88,10 @@ spec: type: date description: Age of the resource JSONPath: .metadata.creationTimestamp + - name: suspend + type: string + description: Suspend reconciliation + JSONPath: .spec.suspend subresources: status: {} validation: @@ -304,6 +308,13 @@ spec: enum: - "" - "RollingUpdate" + suspend: + !!merge <<: *TypeStringBool + description: | + Suspend reconciliation of resources managed by a ClickHouse Installation. + Works as the following: + - When `suspend` is `true` operator stops reconciling all resources. + - When `suspend` is `false` or not set, operator reconciles all resources. troubleshoot: !!merge <<: *TypeStringBool description: | @@ -1359,6 +1370,10 @@ spec: type: date description: Age of the resource JSONPath: .metadata.creationTimestamp + - name: suspend + type: string + description: Suspend reconciliation + JSONPath: .spec.suspend validation: openAPIV3Schema: description: "define a set of Kubernetes resources (StatefulSet, PVC, Service, ConfigMap) which describe behavior one or more clusters" @@ -1573,6 +1588,13 @@ spec: enum: - "" - "RollingUpdate" + suspend: + !!merge <<: *TypeStringBool + description: | + Suspend reconciliation of resources managed by a ClickHouse Installation. + Works as the following: + - When `suspend` is `true` operator stops reconciling all resources. + - When `suspend` is `false` or not set, operator reconciles all resources. troubleshoot: !!merge <<: *TypeStringBool description: | @@ -3074,6 +3096,11 @@ spec: description: Age of the resource # Displayed in all priorities jsonPath: .metadata.creationTimestamp + - name: suspend + type: string + description: Suspend reconciliation + # Displayed in all priorities + jsonPath: .spec.suspend subresources: status: {} schema: @@ -3282,6 +3309,13 @@ spec: - "disabled" - "Enabled" - "enabled" + suspend: + !!merge <<: *TypeStringBool + description: | + Suspend reconciliation of resources managed by a ClickHouse Keeper. + Works as the following: + - When `suspend` is `true` operator stops reconciling all keeper resources. + - When `suspend` is `false` or not set, operator reconciles all keeper resources. namespaceDomainPattern: type: string description: | @@ -3835,7 +3869,6 @@ metadata: namespace: ${OPERATOR_NAMESPACE} labels: clickhouse.altinity.com/chop: 0.24.3 - # Template Parameters: # # NAMESPACE=${OPERATOR_NAMESPACE} diff --git a/deploy/operator/clickhouse-operator-install-template.yaml b/deploy/operator/clickhouse-operator-install-template.yaml index fddc955ca..3dc0196de 100644 --- a/deploy/operator/clickhouse-operator-install-template.yaml +++ b/deploy/operator/clickhouse-operator-install-template.yaml @@ -92,6 +92,11 @@ spec: description: Age of the resource # Displayed in all priorities jsonPath: .metadata.creationTimestamp + - name: suspend + type: string + description: Suspend reconciliation + # Displayed in all priorities + jsonPath: .spec.suspend subresources: status: {} schema: @@ -308,6 +313,13 @@ spec: enum: - "" - "RollingUpdate" + suspend: + <<: *TypeStringBool + description: | + Suspend reconciliation of resources managed by a ClickHouse Installation. + Works as the following: + - When `suspend` is `true` operator stops reconciling all resources. + - When `suspend` is `false` or not set, operator reconciles all resources. troubleshoot: <<: *TypeStringBool description: | @@ -1372,6 +1384,11 @@ spec: description: Age of the resource # Displayed in all priorities jsonPath: .metadata.creationTimestamp + - name: suspend + type: string + description: Suspend reconciliation + # Displayed in all priorities + jsonPath: .spec.suspend subresources: status: {} schema: @@ -1588,6 +1605,13 @@ spec: enum: - "" - "RollingUpdate" + suspend: + <<: *TypeStringBool + description: | + Suspend reconciliation of resources managed by a ClickHouse Installation. + Works as the following: + - When `suspend` is `true` operator stops reconciling all resources. + - When `suspend` is `false` or not set, operator reconciles all resources. troubleshoot: <<: *TypeStringBool description: | @@ -3104,6 +3128,11 @@ spec: description: Age of the resource # Displayed in all priorities jsonPath: .metadata.creationTimestamp + - name: suspend + type: string + description: Suspend reconciliation + # Displayed in all priorities + jsonPath: .spec.suspend subresources: status: {} schema: @@ -3312,6 +3341,13 @@ spec: - "disabled" - "Enabled" - "enabled" + suspend: + <<: *TypeStringBool + description: | + Suspend reconciliation of resources managed by a ClickHouse Keeper. + Works as the following: + - When `suspend` is `true` operator stops reconciling all keeper resources. + - When `suspend` is `false` or not set, operator reconciles all keeper resources. namespaceDomainPattern: type: string description: | diff --git a/deploy/operator/clickhouse-operator-install-tf.yaml b/deploy/operator/clickhouse-operator-install-tf.yaml index a017e5021..a5588e0b0 100644 --- a/deploy/operator/clickhouse-operator-install-tf.yaml +++ b/deploy/operator/clickhouse-operator-install-tf.yaml @@ -99,6 +99,11 @@ spec: description: Age of the resource # Displayed in all priorities jsonPath: .metadata.creationTimestamp + - name: suspend + type: string + description: Suspend reconciliation + # Displayed in all priorities + jsonPath: .spec.suspend subresources: status: {} schema: @@ -315,6 +320,13 @@ spec: enum: - "" - "RollingUpdate" + suspend: + <<: *TypeStringBool + description: | + Suspend reconciliation of resources managed by a ClickHouse Installation. + Works as the following: + - When `suspend` is `true` operator stops reconciling all resources. + - When `suspend` is `false` or not set, operator reconciles all resources. troubleshoot: <<: *TypeStringBool description: | @@ -1379,6 +1391,11 @@ spec: description: Age of the resource # Displayed in all priorities jsonPath: .metadata.creationTimestamp + - name: suspend + type: string + description: Suspend reconciliation + # Displayed in all priorities + jsonPath: .spec.suspend subresources: status: {} schema: @@ -1595,6 +1612,13 @@ spec: enum: - "" - "RollingUpdate" + suspend: + <<: *TypeStringBool + description: | + Suspend reconciliation of resources managed by a ClickHouse Installation. + Works as the following: + - When `suspend` is `true` operator stops reconciling all resources. + - When `suspend` is `false` or not set, operator reconciles all resources. troubleshoot: <<: *TypeStringBool description: | @@ -3111,6 +3135,11 @@ spec: description: Age of the resource # Displayed in all priorities jsonPath: .metadata.creationTimestamp + - name: suspend + type: string + description: Suspend reconciliation + # Displayed in all priorities + jsonPath: .spec.suspend subresources: status: {} schema: @@ -3319,6 +3348,13 @@ spec: - "disabled" - "Enabled" - "enabled" + suspend: + <<: *TypeStringBool + description: | + Suspend reconciliation of resources managed by a ClickHouse Keeper. + Works as the following: + - When `suspend` is `true` operator stops reconciling all keeper resources. + - When `suspend` is `false` or not set, operator reconciles all keeper resources. namespaceDomainPattern: type: string description: | diff --git a/deploy/operator/parts/crd.yaml b/deploy/operator/parts/crd.yaml index 22481014c..b980729c7 100644 --- a/deploy/operator/parts/crd.yaml +++ b/deploy/operator/parts/crd.yaml @@ -92,6 +92,11 @@ spec: description: Age of the resource # Displayed in all priorities jsonPath: .metadata.creationTimestamp + - name: suspend + type: string + description: Suspend reconciliation + # Displayed in all priorities + jsonPath: .spec.suspend subresources: status: {} schema: @@ -308,6 +313,38 @@ spec: enum: - "" - "RollingUpdate" + suspend: + type: string + enum: + # List StringBoolXXX constants from model + - "" + - "0" + - "1" + - "False" + - "false" + - "True" + - "true" + - "No" + - "no" + - "Yes" + - "yes" + - "Off" + - "off" + - "On" + - "on" + - "Disable" + - "disable" + - "Enable" + - "enable" + - "Disabled" + - "disabled" + - "Enabled" + - "enabled" + description: | + Suspend reconciliation of resources managed by a ClickHouse Installation. + Works as the following: + - When `suspend` is `true` operator stops reconciling all resources. + - When `suspend` is `false` or not set, operator reconciles all resources. troubleshoot: type: string enum: @@ -1997,6 +2034,11 @@ spec: description: Age of the resource # Displayed in all priorities jsonPath: .metadata.creationTimestamp + - name: suspend + type: string + description: Suspend reconciliation + # Displayed in all priorities + jsonPath: .spec.suspend subresources: status: {} schema: @@ -2213,6 +2255,38 @@ spec: enum: - "" - "RollingUpdate" + suspend: + type: string + enum: + # List StringBoolXXX constants from model + - "" + - "0" + - "1" + - "False" + - "false" + - "True" + - "true" + - "No" + - "no" + - "Yes" + - "yes" + - "Off" + - "off" + - "On" + - "on" + - "Disable" + - "disable" + - "Enable" + - "enable" + - "Disabled" + - "disabled" + - "Enabled" + - "enabled" + description: | + Suspend reconciliation of resources managed by a ClickHouse Installation. + Works as the following: + - When `suspend` is `true` operator stops reconciling all resources. + - When `suspend` is `false` or not set, operator reconciles all resources. troubleshoot: type: string enum: @@ -4523,6 +4597,11 @@ spec: description: Age of the resource # Displayed in all priorities jsonPath: .metadata.creationTimestamp + - name: suspend + type: string + description: Suspend reconciliation + # Displayed in all priorities + jsonPath: .spec.suspend subresources: status: {} schema: @@ -4731,6 +4810,38 @@ spec: - "disabled" - "Enabled" - "enabled" + suspend: + type: string + enum: + # List StringBoolXXX constants from model + - "" + - "0" + - "1" + - "False" + - "false" + - "True" + - "true" + - "No" + - "no" + - "Yes" + - "yes" + - "Off" + - "off" + - "On" + - "on" + - "Disable" + - "disable" + - "Enable" + - "enable" + - "Disabled" + - "disabled" + - "Enabled" + - "enabled" + description: | + Suspend reconciliation of resources managed by a ClickHouse Keeper. + Works as the following: + - When `suspend` is `true` operator stops reconciling all keeper resources. + - When `suspend` is `false` or not set, operator reconciles all keeper resources. namespaceDomainPattern: type: string description: | diff --git a/pkg/apis/clickhouse-keeper.altinity.com/v1/type_spec.go b/pkg/apis/clickhouse-keeper.altinity.com/v1/type_spec.go index faa798fe5..7db0c06c4 100644 --- a/pkg/apis/clickhouse-keeper.altinity.com/v1/type_spec.go +++ b/pkg/apis/clickhouse-keeper.altinity.com/v1/type_spec.go @@ -23,6 +23,7 @@ import ( type ChkSpec struct { TaskID *types.String `json:"taskID,omitempty" yaml:"taskID,omitempty"` NamespaceDomainPattern *types.String `json:"namespaceDomainPattern,omitempty" yaml:"namespaceDomainPattern,omitempty"` + Suspend *types.StringBool `json:"suspend,omitempty" yaml:"suspend,omitempty"` Reconciling *apiChi.Reconciling `json:"reconciling,omitempty" yaml:"reconciling,omitempty"` Defaults *apiChi.Defaults `json:"defaults,omitempty" yaml:"defaults,omitempty"` Configuration *Configuration `json:"configuration,omitempty" yaml:"configuration,omitempty"` @@ -91,6 +92,9 @@ func (spec *ChkSpec) MergeFrom(from *ChkSpec, _type apiChi.MergeType) { if !spec.NamespaceDomainPattern.HasValue() { spec.NamespaceDomainPattern = spec.NamespaceDomainPattern.MergeFrom(from.NamespaceDomainPattern) } + if !spec.Suspend.HasValue() { + spec.Suspend = spec.Suspend.MergeFrom(from.Suspend) + } case apiChi.MergeTypeOverrideByNonEmptyValues: if from.HasTaskID() { spec.TaskID = spec.TaskID.MergeFrom(from.TaskID) @@ -98,6 +102,9 @@ func (spec *ChkSpec) MergeFrom(from *ChkSpec, _type apiChi.MergeType) { if from.NamespaceDomainPattern.HasValue() { spec.NamespaceDomainPattern = spec.NamespaceDomainPattern.MergeFrom(from.NamespaceDomainPattern) } + if spec.Suspend.HasValue() { + spec.Suspend = spec.Suspend.MergeFrom(from.Suspend) + } } spec.Reconciling = spec.Reconciling.MergeFrom(from.Reconciling, _type) diff --git a/pkg/apis/clickhouse-keeper.altinity.com/v1/zz_generated.deepcopy.go b/pkg/apis/clickhouse-keeper.altinity.com/v1/zz_generated.deepcopy.go index 576453393..af37abd8d 100644 --- a/pkg/apis/clickhouse-keeper.altinity.com/v1/zz_generated.deepcopy.go +++ b/pkg/apis/clickhouse-keeper.altinity.com/v1/zz_generated.deepcopy.go @@ -293,6 +293,11 @@ func (in *ChkSpec) DeepCopyInto(out *ChkSpec) { *out = new(types.String) **out = **in } + if in.Suspend != nil { + in, out := &in.Suspend, &out.Suspend + *out = new(types.StringBool) + **out = **in + } if in.Reconciling != nil { in, out := &in.Reconciling, &out.Reconciling *out = new(clickhousealtinitycomv1.Reconciling) diff --git a/pkg/apis/clickhouse.altinity.com/v1/type_spec.go b/pkg/apis/clickhouse.altinity.com/v1/type_spec.go index 1464d5da5..e76057b08 100644 --- a/pkg/apis/clickhouse.altinity.com/v1/type_spec.go +++ b/pkg/apis/clickhouse.altinity.com/v1/type_spec.go @@ -24,6 +24,7 @@ type ChiSpec struct { Stop *types.StringBool `json:"stop,omitempty" yaml:"stop,omitempty"` Restart *types.String `json:"restart,omitempty" yaml:"restart,omitempty"` Troubleshoot *types.StringBool `json:"troubleshoot,omitempty" yaml:"troubleshoot,omitempty"` + Suspend *types.StringBool `json:"suspend,omitempty" yaml:"suspend,omitempty"` NamespaceDomainPattern *types.String `json:"namespaceDomainPattern,omitempty" yaml:"namespaceDomainPattern,omitempty"` Templating *ChiTemplating `json:"templating,omitempty" yaml:"templating,omitempty"` Reconciling *Reconciling `json:"reconciling,omitempty" yaml:"reconciling,omitempty"` @@ -132,6 +133,9 @@ func (spec *ChiSpec) MergeFrom(from *ChiSpec, _type MergeType) { if !spec.NamespaceDomainPattern.HasValue() { spec.NamespaceDomainPattern = spec.NamespaceDomainPattern.MergeFrom(from.NamespaceDomainPattern) } + if !spec.Suspend.HasValue() { + spec.Suspend = spec.Suspend.MergeFrom(from.Suspend) + } case MergeTypeOverrideByNonEmptyValues: if from.HasTaskID() { spec.TaskID = spec.TaskID.MergeFrom(from.TaskID) @@ -151,6 +155,10 @@ func (spec *ChiSpec) MergeFrom(from *ChiSpec, _type MergeType) { if from.NamespaceDomainPattern.HasValue() { spec.NamespaceDomainPattern = spec.NamespaceDomainPattern.MergeFrom(from.NamespaceDomainPattern) } + if from.Suspend.HasValue() { + // Override by non-empty values only + spec.Suspend = from.Suspend + } } spec.Templating = spec.Templating.MergeFrom(from.Templating, _type) diff --git a/pkg/apis/clickhouse.altinity.com/v1/zz_generated.deepcopy.go b/pkg/apis/clickhouse.altinity.com/v1/zz_generated.deepcopy.go index 6f7c83ed9..857e7e1fd 100644 --- a/pkg/apis/clickhouse.altinity.com/v1/zz_generated.deepcopy.go +++ b/pkg/apis/clickhouse.altinity.com/v1/zz_generated.deepcopy.go @@ -305,6 +305,11 @@ func (in *ChiSpec) DeepCopyInto(out *ChiSpec) { *out = new(types.StringBool) **out = **in } + if in.Suspend != nil { + in, out := &in.Suspend, &out.Suspend + *out = new(types.StringBool) + **out = **in + } if in.NamespaceDomainPattern != nil { in, out := &in.NamespaceDomainPattern, &out.NamespaceDomainPattern *out = new(types.String) diff --git a/pkg/controller/chi/controller.go b/pkg/controller/chi/controller.go index 1bfaf0cef..f09044f36 100644 --- a/pkg/controller/chi/controller.go +++ b/pkg/controller/chi/controller.go @@ -149,7 +149,7 @@ func (c *Controller) addEventHandlersCHI( chopInformerFactory.Clickhouse().V1().ClickHouseInstallations().Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: func(obj interface{}) { chi := obj.(*api.ClickHouseInstallation) - if !chop.Config().IsWatchedNamespace(chi.Namespace) { + if !shouldEnqueue(chi) { return } log.V(3).M(chi).Info("chiInformer.AddFunc") @@ -158,7 +158,7 @@ func (c *Controller) addEventHandlersCHI( UpdateFunc: func(old, new interface{}) { oldChi := old.(*api.ClickHouseInstallation) newChi := new.(*api.ClickHouseInstallation) - if !chop.Config().IsWatchedNamespace(newChi.Namespace) { + if !shouldEnqueue(newChi) { return } log.V(3).M(newChi).Info("chiInformer.UpdateFunc") @@ -850,3 +850,17 @@ func (c *Controller) handleObject(obj interface{}) { // Add CHI object into reconcile loop // TODO c.enqueueObject(chi.Namespace, chi.Name, chi) } + +func shouldEnqueue(chi *api.ClickHouseInstallation) bool { + if !chop.Config().IsWatchedNamespace(chi.Namespace) { + return false + } + + // if CR is suspended, should skip reconciliation + if chi.Spec.Suspend.Value() { + log.V(5).M(chi).Info("chiInformer: skip enqueue, CHI suspended") + return false + } + + return true +} diff --git a/pkg/controller/chi/controller_test.go b/pkg/controller/chi/controller_test.go new file mode 100644 index 000000000..e93ed1d4f --- /dev/null +++ b/pkg/controller/chi/controller_test.go @@ -0,0 +1,47 @@ +package chi + +import ( + "testing" + + api "github.com/altinity/clickhouse-operator/pkg/apis/clickhouse.altinity.com/v1" + "github.com/altinity/clickhouse-operator/pkg/apis/common/types" + "github.com/altinity/clickhouse-operator/pkg/chop" +) + +func init() { + chop.New(nil, nil, "") +} + +func Test_shouldEnqueue(t *testing.T) { + tests := []struct { + name string + chi *api.ClickHouseInstallation + want bool + }{ + { + name: "skips when chi is suspended", + chi: &api.ClickHouseInstallation{ + Spec: api.ChiSpec{ + Suspend: types.NewStringBool(true), + }, + }, + want: false, + }, + { + name: "enqueues when chi is not suspended", + chi: &api.ClickHouseInstallation{ + Spec: api.ChiSpec{ + Suspend: types.NewStringBool(false), + }, + }, + want: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := shouldEnqueue(tt.chi); got != tt.want { + t.Errorf("shouldEnqueue() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/pkg/controller/chk/worker-chk-reconciler.go b/pkg/controller/chk/worker-chk-reconciler.go index f438471d8..589103863 100644 --- a/pkg/controller/chk/worker-chk-reconciler.go +++ b/pkg/controller/chk/worker-chk-reconciler.go @@ -43,6 +43,11 @@ func (w *worker) reconcileCR(ctx context.Context, old, new *apiChk.ClickHouseKee return nil } + if new.Spec.Suspend.Value() { + log.V(2).M(new).F().Info("CR is suspended, skip reconcile") + return nil + } + w.a.M(new).S().P() defer w.a.M(new).E().P() From bb84069080676eb4ffbb9313ad1b97330992e224 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Wed, 11 Dec 2024 14:48:51 +0300 Subject: [PATCH 022/161] env: manifests --- ...installations.clickhouse.altinity.com.yaml | 29 ++----------------- ...tiontemplates.clickhouse.altinity.com.yaml | 29 ++----------------- ...ations.clickhouse-keeper.altinity.com.yaml | 29 ++----------------- .../ServiceAccount-clickhouse-operator.yaml | 1 + ...house-operator-install-bundle-v1beta1.yaml | 1 + ...use-operator-install-template-v1beta1.yaml | 1 + 6 files changed, 9 insertions(+), 81 deletions(-) diff --git a/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhouseinstallations.clickhouse.altinity.com.yaml b/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhouseinstallations.clickhouse.altinity.com.yaml index 215526181..2227b3732 100644 --- a/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhouseinstallations.clickhouse.altinity.com.yaml +++ b/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhouseinstallations.clickhouse.altinity.com.yaml @@ -313,38 +313,13 @@ spec: enum: - "" - "RollingUpdate" - suspend: &TypeStringBool - type: string + suspend: + !!merge <<: *TypeStringBool description: | Suspend reconciliation of resources managed by a ClickHouse Installation. Works as the following: - When `suspend` is `true` operator stops reconciling all resources. - When `suspend` is `false` or not set, operator reconciles all resources. - enum: - # List StringBoolXXX constants from model - - "" - - "0" - - "1" - - "False" - - "false" - - "True" - - "true" - - "No" - - "no" - - "Yes" - - "yes" - - "Off" - - "off" - - "On" - - "on" - - "Disable" - - "disable" - - "Enable" - - "enable" - - "Disabled" - - "disabled" - - "Enabled" - - "enabled" troubleshoot: !!merge <<: *TypeStringBool description: | diff --git a/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhouseinstallationtemplates.clickhouse.altinity.com.yaml b/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhouseinstallationtemplates.clickhouse.altinity.com.yaml index 1adc6d434..a8fcaf93d 100644 --- a/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhouseinstallationtemplates.clickhouse.altinity.com.yaml +++ b/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhouseinstallationtemplates.clickhouse.altinity.com.yaml @@ -313,38 +313,13 @@ spec: enum: - "" - "RollingUpdate" - suspend: &TypeStringBool - type: string + suspend: + !!merge <<: *TypeStringBool description: | Suspend reconciliation of resources managed by a ClickHouse Installation. Works as the following: - When `suspend` is `true` operator stops reconciling all resources. - When `suspend` is `false` or not set, operator reconciles all resources. - enum: - # List StringBoolXXX constants from model - - "" - - "0" - - "1" - - "False" - - "false" - - "True" - - "true" - - "No" - - "no" - - "Yes" - - "yes" - - "Off" - - "off" - - "On" - - "on" - - "Disable" - - "disable" - - "Enable" - - "enable" - - "Disabled" - - "disabled" - - "Enabled" - - "enabled" troubleshoot: !!merge <<: *TypeStringBool description: | diff --git a/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhousekeeperinstallations.clickhouse-keeper.altinity.com.yaml b/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhousekeeperinstallations.clickhouse-keeper.altinity.com.yaml index 8221b2a0a..f8c3d78b4 100644 --- a/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhousekeeperinstallations.clickhouse-keeper.altinity.com.yaml +++ b/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhousekeeperinstallations.clickhouse-keeper.altinity.com.yaml @@ -301,38 +301,13 @@ spec: - "disabled" - "Enabled" - "enabled" - suspend: &TypeStringBool - type: string + suspend: + !!merge <<: *TypeStringBool description: | Suspend reconciliation of resources managed by a ClickHouse Keeper. Works as the following: - When `suspend` is `true` operator stops reconciling all keeper resources. - When `suspend` is `false` or not set, operator reconciles all keeper resources. - enum: - # List StringBoolXXX constants from model - - "" - - "0" - - "1" - - "False" - - "false" - - "True" - - "true" - - "No" - - "no" - - "Yes" - - "yes" - - "Off" - - "off" - - "On" - - "on" - - "Disable" - - "disable" - - "Enable" - - "enable" - - "Disabled" - - "disabled" - - "Enabled" - - "enabled" namespaceDomainPattern: type: string description: | diff --git a/deploy/helm/clickhouse-operator/templates/generated/ServiceAccount-clickhouse-operator.yaml b/deploy/helm/clickhouse-operator/templates/generated/ServiceAccount-clickhouse-operator.yaml index 803619f97..3bc8d89af 100644 --- a/deploy/helm/clickhouse-operator/templates/generated/ServiceAccount-clickhouse-operator.yaml +++ b/deploy/helm/clickhouse-operator/templates/generated/ServiceAccount-clickhouse-operator.yaml @@ -13,6 +13,7 @@ metadata: namespace: {{ .Release.Namespace }} labels: {{ include "altinity-clickhouse-operator.labels" . | nindent 4 }} annotations: {{ toYaml .Values.serviceAccount.annotations | nindent 4 }} + # Template Parameters: # # NAMESPACE=kube-system diff --git a/deploy/operator/clickhouse-operator-install-bundle-v1beta1.yaml b/deploy/operator/clickhouse-operator-install-bundle-v1beta1.yaml index d491ad103..5e15ece95 100644 --- a/deploy/operator/clickhouse-operator-install-bundle-v1beta1.yaml +++ b/deploy/operator/clickhouse-operator-install-bundle-v1beta1.yaml @@ -3869,6 +3869,7 @@ metadata: namespace: kube-system labels: clickhouse.altinity.com/chop: 0.24.3 + # Template Parameters: # # NAMESPACE=kube-system diff --git a/deploy/operator/clickhouse-operator-install-template-v1beta1.yaml b/deploy/operator/clickhouse-operator-install-template-v1beta1.yaml index 928d1ad15..8abc631a1 100644 --- a/deploy/operator/clickhouse-operator-install-template-v1beta1.yaml +++ b/deploy/operator/clickhouse-operator-install-template-v1beta1.yaml @@ -3869,6 +3869,7 @@ metadata: namespace: ${OPERATOR_NAMESPACE} labels: clickhouse.altinity.com/chop: 0.24.3 + # Template Parameters: # # NAMESPACE=${OPERATOR_NAMESPACE} From fc71708f6b4b6e00f3ee7b84f96571de4691da5f Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Wed, 11 Dec 2024 15:05:27 +0300 Subject: [PATCH 023/161] dev: cm storage name --- pkg/controller/chi/kube/cr.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/controller/chi/kube/cr.go b/pkg/controller/chi/kube/cr.go index 3cf8d0b2d..60a7e4d26 100644 --- a/pkg/controller/chi/kube/cr.go +++ b/pkg/controller/chi/kube/cr.go @@ -79,7 +79,7 @@ func (c *CR) buildCR(chi *api.ClickHouseInstallation, cm *core.ConfigMap) *api.C } chi.EnsureStatus().NormalizedCR = normalized } - + if len( cm.Data[statusNormalizedCompleted])>0 { normalizedCompleted := &api.ClickHouseInstallation{} if yaml.Unmarshal([]byte(cm.Data[statusNormalizedCompleted]), normalizedCompleted) != nil { @@ -236,10 +236,10 @@ func (c *CR) buildCMNamespace(obj meta.Object) string { } func (c *CR) buildCMName(obj meta.Object) string { - return obj.GetName() + return "chi-storage-" + obj.GetName() } const ( statusNormalized = "status-normalized" statusNormalizedCompleted = "status-normalizedCompleted" -) \ No newline at end of file +) From 18becf8300a29afb25e060b32632c05d80eac780 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Wed, 11 Dec 2024 15:05:45 +0300 Subject: [PATCH 024/161] dev: format --- pkg/controller/chi/kube/cr.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/controller/chi/kube/cr.go b/pkg/controller/chi/kube/cr.go index 60a7e4d26..fbf5d7c32 100644 --- a/pkg/controller/chi/kube/cr.go +++ b/pkg/controller/chi/kube/cr.go @@ -77,10 +77,10 @@ func (c *CR) buildCR(chi *api.ClickHouseInstallation, cm *core.ConfigMap) *api.C if yaml.Unmarshal([]byte(cm.Data[statusNormalized]), normalized) != nil { return chi } - chi.EnsureStatus().NormalizedCR = normalized + chi.EnsureStatus().NormalizedCR = normalized } - if len( cm.Data[statusNormalizedCompleted])>0 { + if len(cm.Data[statusNormalizedCompleted]) > 0 { normalizedCompleted := &api.ClickHouseInstallation{} if yaml.Unmarshal([]byte(cm.Data[statusNormalizedCompleted]), normalizedCompleted) != nil { return chi @@ -240,6 +240,6 @@ func (c *CR) buildCMName(obj meta.Object) string { } const ( -statusNormalized = "status-normalized" -statusNormalizedCompleted = "status-normalizedCompleted" + statusNormalized = "status-normalized" + statusNormalizedCompleted = "status-normalizedCompleted" ) From 8bcc2bcf8236e383c821f29418fae7cca1aa0b62 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Wed, 11 Dec 2024 15:20:50 +0300 Subject: [PATCH 025/161] dev: add stopped check to cluster --- pkg/apis/clickhouse.altinity.com/v1/type_cluster.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pkg/apis/clickhouse.altinity.com/v1/type_cluster.go b/pkg/apis/clickhouse.altinity.com/v1/type_cluster.go index f38686234..dbe8bbb24 100644 --- a/pkg/apis/clickhouse.altinity.com/v1/type_cluster.go +++ b/pkg/apis/clickhouse.altinity.com/v1/type_cluster.go @@ -377,6 +377,11 @@ func (cluster *Cluster) IsNonZero() bool { return cluster != nil } +// IsStopped checks whether host is stopped +func (cluster *Cluster) IsStopped() bool { + return cluster.GetCHI().IsStopped() +} + // ChiClusterLayout defines layout section of .spec.configuration.clusters type ChiClusterLayout struct { ShardsCount int `json:"shardsCount,omitempty" yaml:"shardsCount,omitempty"` From c6618759dccfd7c7020f409f1f01686c9be5c911 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Wed, 11 Dec 2024 15:21:42 +0300 Subject: [PATCH 026/161] dev: rename --- pkg/apis/clickhouse.altinity.com/v1/type_cluster.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/apis/clickhouse.altinity.com/v1/type_cluster.go b/pkg/apis/clickhouse.altinity.com/v1/type_cluster.go index dbe8bbb24..23cb47712 100644 --- a/pkg/apis/clickhouse.altinity.com/v1/type_cluster.go +++ b/pkg/apis/clickhouse.altinity.com/v1/type_cluster.go @@ -219,13 +219,13 @@ func (cluster *Cluster) GetServiceTemplate() (*ServiceTemplate, bool) { return cluster.Runtime.CHI.GetServiceTemplate(name) } -// GetCHI gets parent CHI -func (cluster *Cluster) GetCHI() *ClickHouseInstallation { +// GetCR gets parent CR +func (cluster *Cluster) GetCR() *ClickHouseInstallation { return cluster.Runtime.CHI } func (cluster *Cluster) GetAncestor() ICluster { - return cluster.GetCHI().GetAncestor().FindCluster(cluster.GetName()) + return cluster.GetCR().GetAncestor().FindCluster(cluster.GetName()) } // GetShard gets shard with specified index @@ -379,7 +379,7 @@ func (cluster *Cluster) IsNonZero() bool { // IsStopped checks whether host is stopped func (cluster *Cluster) IsStopped() bool { - return cluster.GetCHI().IsStopped() + return cluster.GetCR().IsStopped() } // ChiClusterLayout defines layout section of .spec.configuration.clusters From 43b64433b059262c0839ef56428b5391fc0eaa11 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Wed, 11 Dec 2024 15:26:58 +0300 Subject: [PATCH 027/161] dev: do not reconcile ZK path on stopped cluster --- pkg/controller/chi/worker-zk-integration.go | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/pkg/controller/chi/worker-zk-integration.go b/pkg/controller/chi/worker-zk-integration.go index 4541fb938..20f398e7d 100644 --- a/pkg/controller/chi/worker-zk-integration.go +++ b/pkg/controller/chi/worker-zk-integration.go @@ -20,7 +20,7 @@ import ( ) func reconcileZookeeperRootPath(cluster *api.Cluster) { - if cluster.Zookeeper.IsEmpty() { + if !shouldReconcileZookeeperPath(cluster) { // Nothing to reconcile return } @@ -29,3 +29,16 @@ func reconcileZookeeperRootPath(cluster *api.Cluster) { path.Ensure(cluster.Zookeeper.Root) path.Close() } + +func shouldReconcileZookeeperPath(cluster *api.Cluster) bool { + if cluster.IsStopped() { + // Nothing to reconcile + return false + } + if cluster.Zookeeper.IsEmpty() { + // Nothing to reconcile + return false + } + + return true +} From b5acfcd8f17e20419ebd92caa9d10c444974a947 Mon Sep 17 00:00:00 2001 From: Eugene Klimov Date: Thu, 12 Dec 2024 17:41:57 +0400 Subject: [PATCH 028/161] add kafka related alerts to clickhouse (#1596) --- .../prometheus-alert-rules-clickhouse.yaml | 77 ++++++++++++++++++- 1 file changed, 75 insertions(+), 2 deletions(-) diff --git a/deploy/prometheus/prometheus-alert-rules-clickhouse.yaml b/deploy/prometheus/prometheus-alert-rules-clickhouse.yaml index 700faccbe..d78349e30 100644 --- a/deploy/prometheus/prometheus-alert-rules-clickhouse.yaml +++ b/deploy/prometheus/prometheus-alert-rules-clickhouse.yaml @@ -524,10 +524,83 @@ spec: identifier: "{{ $labels.hostname }}" summary: "Background Message Broker Schedule pool utilised high" description: |- - chi_clickhouse_metric_BackgroundMessageBrokerSchedulePoolTask = {{ with printf "chi_clickhouse_metric_BackgroundMessageBrokerSchedulePoolTask{tenant='%s',chi='%s',hostname='%s'}" .Labels.tenant .Labels.chi .Labels.hostname | query }}{{ . | first | value | printf "%.0f" }}{{ end }} - chi_clickhouse_metric_BackgroundMessageBrokerSchedulePoolSize = {{ with printf "chi_clickhouse_metric_BackgroundMessageBrokerSchedulePoolSize{tenant='%s',chi='%s',hostname='%s'}" .Labels.tenant .Labels.chi .Labels.hostname | query }}{{ . | first | value | printf "%.0f" }}{{ end }} + chi_clickhouse_metric_BackgroundMessageBrokerSchedulePoolTask = {{ with printf "chi_clickhouse_metric_BackgroundMessageBrokerSchedulePoolTask{exported_namespace='%s',chi='%s',hostname='%s'}" .Labels.exported_namespace .Labels.chi .Labels.hostname | query }}{{ . | first | value | printf "%.0f" }}{{ end }} + chi_clickhouse_metric_BackgroundMessageBrokerSchedulePoolSize = {{ with printf "chi_clickhouse_metric_BackgroundMessageBrokerSchedulePoolSize{exported_namespace='%s',chi='%s',hostname='%s'}" .Labels.exported_namespace .Labels.chi .Labels.hostname | query }}{{ . | first | value | printf "%.0f" }}{{ end }} - https://kb.altinity.com/altinity-kb-integrations/altinity-kb-kafka/background_message_broker_schedule_pool_size/ - https://clickhouse.com/docs/en/operations/server-configuration-parameters/settings#background_message_broker_schedule_pool_size - https://clickhouse.com/docs/en/operations/system-tables/metrics#backgroundmessagebrokerschedulepoolsize This pool is used for tasks related to message streaming from Apache Kafka or other message brokers. You need to increase `background_message_broker_schedule_pool_size` to fix the problem. + + - alert: ClickHouseKafkaRebalanceAssignment + expr: increase(chi_clickhouse_event_KafkaRebalanceAssignments[5m]) > 600 + for: 10m + labels: + severity: high + team: ClickHouse + annotations: + identifier: "{{ $labels.hostname }}" + summary: "Kafka re-balance is too high" + description: |- + increase(chi_clickhouse_event_KafkaRebalanceAssignments[5m]) = {{ with printf "increase(chi_clickhouse_event_KafkaRebalanceAssignments{exported_namespace='%s',chi='%s',hostname='%s'}[5m])" .Labels.exported_namespace .Labels.chi .Labels.hostname | query }}{{ . | first | value | printf "%.0f" }}{{ end }} + Kafka partition re-balance happens too often + Check Kafka logs to find root cause for partition re-balance + + - alert: ClickHouseKafkaCommitFailures + expr: increase(chi_clickhouse_event_KafkaCommitFailures[2h]) > 0 + for: 10m + labels: + severity: high + team: ClickHouse + annotations: + description: |- + Commits from Kafka tables failed {{ with printf "increase(chi_clickhouse_event_KafkaCommitFailures{exported_namespace='%s',chi='%s',hostname='%s'}[2h])" .Labels.exported_namespace .Labels.chi .Labels.hostname | query }}{{ . | first | value | printf "%.0f" }} times in the last 2 hours. {{ end }} + + Potential Causes: + - The issue with very slow materialized view(s) that cannot flush data during `max.poll.interval.ms`. + - Kafka-side issue or connectivity issue with the Kafka cluster. + + Suggested Actions: + 1. Check the `system.kafka_consumers` table for consumer states. + 2. Review ClickHouse logs for messages containing `rdk`. + 3. For slow materialized views: + - Try to improve the speed of Materialized View (MV) flushing. + - Decrease `kafka_max_block_size` (will not help if the reason of slowness is JOIN with big table in the MV). + - Increase `600000` (Note: Increasing this may delay detection of malfunctioning consumers). + + - alert: ClickHouseKafkaConsumerErrors + expr: increase(chi_clickhouse_event_KafkaConsumerErrors{}[2h]) > 0 + for: 10m + labels: + severity: high + team: ClickHouse + annotations: + description: |- + Kafka consumer reported errors {{ with printf "increase(chi_clickhouse_event_KafkaConsumerErrors{exported_namespace='%s',chi='%s',hostname='%s'}[2h])" .Labels.exported_namespace .Labels.chi .Labels.hostname | query }}{{ . | first | value | printf "%.0f" }} {{ end }} times in the last 2 hours. + + Potential Actions: + 1. Check the `system.kafka_consumers` table to verify the consumer states and troubleshoot. + 2. Review ClickHouse logs for messages containing `rdk`. Errors in Kafka consumers are often related to connectivity, timeout, or other configuration issues. + + Look https://kb.altinity.com/altinity-kb-integrations/altinity-kb-kafka/error-handling/ + + + - alert: ClickHouseKafkaRebalanceRevocations + expr: increase(chi_clickhouse_event_KafkaRebalanceRevocations[2h]) > chi_clickhouse_metric_KafkaConsumers * 100 + for: 10m + labels: + severity: high + team: ClickHouse + annotations: + description: |- + Kafka re-balance revocations exceeded the expected threshold. + `increase(chi_clickhouse_event_KafkaRebalanceRevocations[2h])` = {{ with printf "increase(chi_clickhouse_event_KafkaRebalanceRevocations{exported_namespace='%s',chi='%s',hostname='%s'}[2h])" .Labels.exported_namespace .Labels.chi .Labels.hostname | query }}{{ . | first | value | printf "%.0f" }}{{ end }} revocations, + compared to `chi_clickhouse_metric_KafkaConsumers` * 100 = {{ with printf "chi_clickhouse_metric_KafkaConsumers{exported_namespace='%s',chi='%s',hostname='%s'} * 100" .Labels.exported_namespace .Labels.chi .Labels.hostname | query }}{{ . | first | value | printf "%.0f" }}{{ end }}. + + Potential Causes: + - Excessive Kafka re-balance activity may indicate instability in the Kafka cluster, misconfigured partitions, or high consumer group churn. + - High consumer lag or network interruptions. + + Suggested Actions: + 1. **Check the `system.kafka_consumers` table** for consumer lag, partition assignments, and state. + 2. **Review ClickHouse logs for related errors**, especially `rdk` messages. \ No newline at end of file From 0eb749002a87c85631f29e44442f6734f5314669 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Fri, 13 Dec 2024 12:18:33 +0300 Subject: [PATCH 029/161] dev: attempt to unify marshallers --- .../v1/type_settings.go | 31 ++++++++++++++++--- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/pkg/apis/clickhouse.altinity.com/v1/type_settings.go b/pkg/apis/clickhouse.altinity.com/v1/type_settings.go index 8e7f2e6e4..5e19b1b86 100644 --- a/pkg/apis/clickhouse.altinity.com/v1/type_settings.go +++ b/pkg/apis/clickhouse.altinity.com/v1/type_settings.go @@ -23,6 +23,7 @@ import ( "strings" "gopkg.in/d4l3k/messagediff.v1" + "gopkg.in/yaml.v3" "github.com/altinity/clickhouse-operator/pkg/apis/common/types" "github.com/altinity/clickhouse-operator/pkg/util" @@ -310,6 +311,26 @@ func (s *Settings) Groups() []string { // UnmarshalJSON unmarshal JSON func (s *Settings) UnmarshalJSON(data []byte) error { + return s.unmarshal(data, json.Unmarshal) +} + +// MarshalJSON marshals JSON +func (s *Settings) MarshalJSON() ([]byte, error) { + return s.marshal(json.Marshal) +} + +// UnmarshalYAML unmarshal YAML +func (s *Settings) UnmarshalYAML(data []byte) error { + return s.unmarshal(data, yaml.Unmarshal) +} + +// MarshalYAML marshals YAML +func (s *Settings) MarshalYAML() ([]byte, error) { + return s.marshal(yaml.Marshal) +} + +// unmarshal +func (s *Settings) unmarshal(data []byte, unmarshaller func(data []byte, v any) error) error { if s == nil { return fmt.Errorf("unable to unmashal with nil") } @@ -319,7 +340,7 @@ func (s *Settings) UnmarshalJSON(data []byte) error { var untypedMap untypedMapType // Provided binary data is expected to unmarshal into untyped map, because settings are map-like struct - if err := json.Unmarshal(data, &untypedMap); err != nil { + if err := unmarshaller(data, &untypedMap); err != nil { return err } @@ -353,10 +374,10 @@ func (s *Settings) UnmarshalJSON(data []byte) error { return nil } -// MarshalJSON marshals JSON -func (s *Settings) MarshalJSON() ([]byte, error) { +// marshal +func (s *Settings) marshal(marshaller func (v any) ([]byte, error)) ([]byte, error) { if s == nil { - return json.Marshal(nil) + return marshaller(nil) } raw := make(map[string]interface{}) @@ -364,7 +385,7 @@ func (s *Settings) MarshalJSON() ([]byte, error) { raw[key] = setting.AsAny() }) - return json.Marshal(raw) + return marshaller(raw) } // fetchPort is the base function to fetch *Int32 port value From aa865f87eeade34a54d96cc502f72163d9b78aba Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Fri, 13 Dec 2024 12:19:20 +0300 Subject: [PATCH 030/161] devv: restore status from external storage --- pkg/controller/chi/worker-chi-reconciler.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pkg/controller/chi/worker-chi-reconciler.go b/pkg/controller/chi/worker-chi-reconciler.go index 17cc84447..a54855e8b 100644 --- a/pkg/controller/chi/worker-chi-reconciler.go +++ b/pkg/controller/chi/worker-chi-reconciler.go @@ -53,6 +53,14 @@ func (w *worker) reconcileCR(ctx context.Context, old, new *api.ClickHouseInstal return nil } + if new != nil { + n, err := w.c.kube.CR().Get(ctx, new.GetNamespace(), new.GetName()) + if err != nil { + return err + } + new = n.(*api.ClickHouseInstallation) + } + w.a.M(new).S().P() defer w.a.M(new).E().P() From 4b764384979e3f579988def331a1016a3026dd69 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Fri, 13 Dec 2024 12:19:55 +0300 Subject: [PATCH 031/161] dev: switch external status to json --- pkg/controller/chi/kube/cr.go | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/pkg/controller/chi/kube/cr.go b/pkg/controller/chi/kube/cr.go index fbf5d7c32..2732d743e 100644 --- a/pkg/controller/chi/kube/cr.go +++ b/pkg/controller/chi/kube/cr.go @@ -16,11 +16,10 @@ package kube import ( "context" + "encoding/json" "fmt" "time" - "gopkg.in/yaml.v3" - core "k8s.io/api/core/v1" apiErrors "k8s.io/apimachinery/pkg/api/errors" meta "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -74,7 +73,7 @@ func (c *CR) buildCR(chi *api.ClickHouseInstallation, cm *core.ConfigMap) *api.C if len(cm.Data[statusNormalized]) > 0 { normalized := &api.ClickHouseInstallation{} - if yaml.Unmarshal([]byte(cm.Data[statusNormalized]), normalized) != nil { + if json.Unmarshal([]byte(cm.Data[statusNormalized]), normalized) != nil { return chi } chi.EnsureStatus().NormalizedCR = normalized @@ -82,7 +81,7 @@ func (c *CR) buildCR(chi *api.ClickHouseInstallation, cm *core.ConfigMap) *api.C if len(cm.Data[statusNormalizedCompleted]) > 0 { normalizedCompleted := &api.ClickHouseInstallation{} - if yaml.Unmarshal([]byte(cm.Data[statusNormalizedCompleted]), normalizedCompleted) != nil { + if json.Unmarshal([]byte(cm.Data[statusNormalizedCompleted]), normalizedCompleted) != nil { return chi } chi.EnsureStatus().NormalizedCRCompleted = normalizedCompleted @@ -194,10 +193,10 @@ func (c *CR) statusUpdate(ctx context.Context, chi *api.ClickHouseInstallation) func (c *CR) buildResources(chi *api.ClickHouseInstallation) (*api.ClickHouseInstallation, *core.ConfigMap) { var normalized, normalizedCompleted []byte if chi.Status.NormalizedCR != nil { - normalized, _ = yaml.Marshal(chi.Status.NormalizedCR) + normalized, _ = json.Marshal(chi.Status.NormalizedCR) } if chi.Status.NormalizedCRCompleted != nil { - normalizedCompleted, _ = yaml.Marshal(chi.Status.NormalizedCRCompleted) + normalizedCompleted, _ = json.Marshal(chi.Status.NormalizedCRCompleted) } cm := &core.ConfigMap{ ObjectMeta: meta.ObjectMeta{ From 99a5aabd66fc8520a9e599bbd581450dc3a4b2c3 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Fri, 13 Dec 2024 12:20:12 +0300 Subject: [PATCH 032/161] dev: estra logger --- pkg/model/chop_config.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pkg/model/chop_config.go b/pkg/model/chop_config.go index 1e9d490a8..65a66ed28 100644 --- a/pkg/model/chop_config.go +++ b/pkg/model/chop_config.go @@ -20,6 +20,7 @@ import ( "gopkg.in/d4l3k/messagediff.v1" api "github.com/altinity/clickhouse-operator/pkg/apis/clickhouse.altinity.com/v1" + log "github.com/altinity/clickhouse-operator/pkg/announcer" "github.com/altinity/clickhouse-operator/pkg/chop" ) @@ -157,6 +158,7 @@ func IsConfigurationChangeRequiresReboot(host *api.Host) bool { } new = host.GetZookeeper() if isZookeeperChangeRequiresReboot(host, old, new) { + log.Info("ChangeRequiresReboot isZookeeperChangeRequiresReboot: true") return true } } @@ -170,6 +172,7 @@ func IsConfigurationChangeRequiresReboot(host *api.Host) bool { new = host.GetCR().GetSpec().GetConfiguration().GetProfiles() } if isSettingsChangeRequiresReboot(host, configurationRestartPolicyRulesSectionProfiles, old, new) { + log.Info("ChangeRequiresReboot isSettingsChangeRequiresReboot(profiles global): true") return true } } @@ -183,6 +186,7 @@ func IsConfigurationChangeRequiresReboot(host *api.Host) bool { new = host.GetCR().GetSpec().GetConfiguration().GetQuotas() } if isSettingsChangeRequiresReboot(host, configurationRestartPolicyRulesSectionQuotas, old, new) { + log.Info("ChangeRequiresReboot isSettingsChangeRequiresReboot(quotas global): true") return true } } @@ -196,6 +200,7 @@ func IsConfigurationChangeRequiresReboot(host *api.Host) bool { new = host.GetCR().GetSpec().GetConfiguration().GetSettings() } if isSettingsChangeRequiresReboot(host, configurationRestartPolicyRulesSectionSettings, old, new) { + log.Info("ChangeRequiresReboot isSettingsChangeRequiresReboot(settings global): true") return true } } @@ -207,6 +212,7 @@ func IsConfigurationChangeRequiresReboot(host *api.Host) bool { } new = host.Settings if isSettingsChangeRequiresReboot(host, configurationRestartPolicyRulesSectionSettings, old, new) { + log.Info("ChangeRequiresReboot isSettingsChangeRequiresReboot(settings local): true") return true } } @@ -228,6 +234,7 @@ func IsConfigurationChangeRequiresReboot(host *api.Host) bool { ) } if isSettingsChangeRequiresReboot(host, configurationRestartPolicyRulesSectionFiles, old, new) { + log.Info("ChangeRequiresReboot isSettingsChangeRequiresReboot(files global): true") return true } } @@ -247,6 +254,7 @@ func IsConfigurationChangeRequiresReboot(host *api.Host) bool { true, ) if isSettingsChangeRequiresReboot(host, configurationRestartPolicyRulesSectionFiles, old, new) { + log.Info("ChangeRequiresReboot isSettingsChangeRequiresReboot(files local): true") return true } } From 73ac3d2650262c12da15dc9cdb99bc1515f44885 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Fri, 13 Dec 2024 12:20:40 +0300 Subject: [PATCH 033/161] format --- tests/e2e/test_operator.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/e2e/test_operator.py b/tests/e2e/test_operator.py index fdc9ec6f5..87f66a8b1 100644 --- a/tests/e2e/test_operator.py +++ b/tests/e2e/test_operator.py @@ -1893,6 +1893,7 @@ def test_016(self): with And("ClickHouse SHOULD NOT be restarted"): new_start_time = kubectl.get_field("pod", f"chi-{chi}-default-0-0-0", ".status.startTime") assert start_time == new_start_time + assert start_time == new_start_time # test-016-settings-03.yaml with When("Update macro and dictionary settings"): From 38f13be1981b9ae768485d447e4efed712025c37 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Fri, 13 Dec 2024 12:20:59 +0300 Subject: [PATCH 034/161] dev: formetter --- pkg/apis/clickhouse.altinity.com/v1/type_settings.go | 2 +- pkg/model/chop_config.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/apis/clickhouse.altinity.com/v1/type_settings.go b/pkg/apis/clickhouse.altinity.com/v1/type_settings.go index 5e19b1b86..a3cbaf0b2 100644 --- a/pkg/apis/clickhouse.altinity.com/v1/type_settings.go +++ b/pkg/apis/clickhouse.altinity.com/v1/type_settings.go @@ -375,7 +375,7 @@ func (s *Settings) unmarshal(data []byte, unmarshaller func(data []byte, v any) } // marshal -func (s *Settings) marshal(marshaller func (v any) ([]byte, error)) ([]byte, error) { +func (s *Settings) marshal(marshaller func(v any) ([]byte, error)) ([]byte, error) { if s == nil { return marshaller(nil) } diff --git a/pkg/model/chop_config.go b/pkg/model/chop_config.go index 65a66ed28..fee656ad7 100644 --- a/pkg/model/chop_config.go +++ b/pkg/model/chop_config.go @@ -19,8 +19,8 @@ import ( "gopkg.in/d4l3k/messagediff.v1" - api "github.com/altinity/clickhouse-operator/pkg/apis/clickhouse.altinity.com/v1" log "github.com/altinity/clickhouse-operator/pkg/announcer" + api "github.com/altinity/clickhouse-operator/pkg/apis/clickhouse.altinity.com/v1" "github.com/altinity/clickhouse-operator/pkg/chop" ) From 830fec15c2b607567aaa6fc89ba8f8b5227d319b Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Fri, 13 Dec 2024 12:39:06 +0300 Subject: [PATCH 035/161] test: frmat --- tests/e2e/steps.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/e2e/steps.py b/tests/e2e/steps.py index 6a88fc008..ae4f9bebf 100644 --- a/tests/e2e/steps.py +++ b/tests/e2e/steps.py @@ -152,9 +152,9 @@ def check_metrics_monitoring( self, operator_namespace, operator_pod, - expect_pattern = "", - expect_metric = "", - expect_labels = "", + expect_pattern="", + expect_metric="", + expect_labels="", container="metrics-exporter", port="8888", max_retries=7 From 05975d1c5eccbfbb5c21e4130e9155925627a127 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Fri, 13 Dec 2024 12:39:34 +0300 Subject: [PATCH 036/161] tests: test 34 is not correct - add to regression --- tests/regression.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/regression.py b/tests/regression.py index b4fa3bb70..0dd524448 100755 --- a/tests/regression.py +++ b/tests/regression.py @@ -10,6 +10,7 @@ "/regression/e2e.test_operator/test_008*": [(Fail, "Test 008 sometimes fails due to unknown reasons")], "/regression/e2e.test_operator/test_014*": [(Fail, "Test 014 sometimes fails due to unknown reasons")], "/regression/e2e.test_operator/test_032*": [(Fail, "Test 032 sometimes fails due to unknown reasons")], + "/regression/e2e.test_operator/test_034*": [(Fail, "Test 034 is not completed yet, it should run reconcile before checking metrics errors availability")], # test_clickhouse.py "/regression/e2e.test_clickhouse/test_ch_001*": [(Fail, "Insert Quorum test need to refactoring")], # test_metrics_alerts.py From cfa4f53b99f648aecc460e77ce67a5f8dfab5e11 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Fri, 13 Dec 2024 12:49:38 +0300 Subject: [PATCH 037/161] test. regression --- tests/regression.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/regression.py b/tests/regression.py index 0dd524448..ad3e8f0bb 100755 --- a/tests/regression.py +++ b/tests/regression.py @@ -10,7 +10,7 @@ "/regression/e2e.test_operator/test_008*": [(Fail, "Test 008 sometimes fails due to unknown reasons")], "/regression/e2e.test_operator/test_014*": [(Fail, "Test 014 sometimes fails due to unknown reasons")], "/regression/e2e.test_operator/test_032*": [(Fail, "Test 032 sometimes fails due to unknown reasons")], - "/regression/e2e.test_operator/test_034*": [(Fail, "Test 034 is not completed yet, it should run reconcile before checking metrics errors availability")], + "/regression/e2e.test_operator/test_034*": [(Fail, "Test 034 is incorrect, for metrics errors reconcile required")], # test_clickhouse.py "/regression/e2e.test_clickhouse/test_ch_001*": [(Fail, "Insert Quorum test need to refactoring")], # test_metrics_alerts.py From 92ee931625ed02eb84a662ffd8cc6cb66186474c Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Fri, 13 Dec 2024 12:52:37 +0300 Subject: [PATCH 038/161] test: msg --- tests/regression.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/regression.py b/tests/regression.py index ad3e8f0bb..f9c181b45 100755 --- a/tests/regression.py +++ b/tests/regression.py @@ -10,7 +10,7 @@ "/regression/e2e.test_operator/test_008*": [(Fail, "Test 008 sometimes fails due to unknown reasons")], "/regression/e2e.test_operator/test_014*": [(Fail, "Test 014 sometimes fails due to unknown reasons")], "/regression/e2e.test_operator/test_032*": [(Fail, "Test 032 sometimes fails due to unknown reasons")], - "/regression/e2e.test_operator/test_034*": [(Fail, "Test 034 is incorrect, for metrics errors reconcile required")], + "/regression/e2e.test_operator/test_034*": [(Fail, "Test 034 is incorrect - metrics errors require reconcile")], # test_clickhouse.py "/regression/e2e.test_clickhouse/test_ch_001*": [(Fail, "Insert Quorum test need to refactoring")], # test_metrics_alerts.py From 39acd22c020c6c9d1a5df321c1bb8dc1d14d43b3 Mon Sep 17 00:00:00 2001 From: alz Date: Fri, 13 Dec 2024 20:17:22 +0300 Subject: [PATCH 039/161] Always create a random namespace to prevent cascade test failures --- tests/e2e/steps.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/tests/e2e/steps.py b/tests/e2e/steps.py index ae4f9bebf..db29e9159 100644 --- a/tests/e2e/steps.py +++ b/tests/e2e/steps.py @@ -31,18 +31,16 @@ def get_shell(self, timeout=600): def create_test_namespace(self, force=False): """Create unique test namespace for test.""" - if (self.cflags & PARALLEL) and not force: - self.context.test_namespace = self.name[self.name.find('test_0'):self.name.find('. ')].replace("_", "-") + "-" + str(uuid.uuid1()) - self.context.operator_namespace = self.context.test_namespace - util.create_namespace(self.context.test_namespace) - util.install_operator_if_not_exist() - return self.context.test_namespace - else: - self.context.operator_namespace = self.context.test_namespace - util.create_namespace(self.context.test_namespace) - util.install_operator_if_not_exist() - return self.context.test_namespace + random_namespace = self.name[self.name.find('test_0'):self.name.find('. ')].replace("_", "-") + "-" + str(uuid.uuid1()) + if not force: # (self.cflags & PARALLEL) and not force: + self.context.test_namespace = random_namespace + + self.context.operator_namespace = self.context.test_namespace + util.create_namespace(self.context.test_namespace) + util.install_operator_if_not_exist() + + return self.context.test_namespace @TestStep(Finally) def delete_test_namespace(self): From f37b75c90161178d379fcfb3e07de815e4be9351 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Sat, 14 Dec 2024 01:26:08 +0300 Subject: [PATCH 040/161] test: allow 47 to flip --- tests/regression.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/regression.py b/tests/regression.py index f9c181b45..f19ddcf7e 100755 --- a/tests/regression.py +++ b/tests/regression.py @@ -11,6 +11,7 @@ "/regression/e2e.test_operator/test_014*": [(Fail, "Test 014 sometimes fails due to unknown reasons")], "/regression/e2e.test_operator/test_032*": [(Fail, "Test 032 sometimes fails due to unknown reasons")], "/regression/e2e.test_operator/test_034*": [(Fail, "Test 034 is incorrect - metrics errors require reconcile")], + "/regression/e2e.test_operator/test_047*": [(Fail, "Test 047 sometimes fails due to unknown reasons")], # test_clickhouse.py "/regression/e2e.test_clickhouse/test_ch_001*": [(Fail, "Insert Quorum test need to refactoring")], # test_metrics_alerts.py From 1715e4b94a449bd3df6c944df6b92a5ae294ae9f Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Sun, 15 Dec 2024 12:42:44 +0300 Subject: [PATCH 041/161] regression --- tests/regression.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/regression.py b/tests/regression.py index f19ddcf7e..a2cfb5c7b 100755 --- a/tests/regression.py +++ b/tests/regression.py @@ -11,7 +11,9 @@ "/regression/e2e.test_operator/test_014*": [(Fail, "Test 014 sometimes fails due to unknown reasons")], "/regression/e2e.test_operator/test_032*": [(Fail, "Test 032 sometimes fails due to unknown reasons")], "/regression/e2e.test_operator/test_034*": [(Fail, "Test 034 is incorrect - metrics errors require reconcile")], + "/regression/e2e.test_operator/test_036*": [(Fail, "Test 036 sometimes fails due to unknown reasons")], "/regression/e2e.test_operator/test_047*": [(Fail, "Test 047 sometimes fails due to unknown reasons")], + "/regression/e2e.test_operator/test_049*": [(Fail, "Test 049 sometimes fails due to unknown reasons")], # test_clickhouse.py "/regression/e2e.test_clickhouse/test_ch_001*": [(Fail, "Insert Quorum test need to refactoring")], # test_metrics_alerts.py From 479ae03656ece9f738d186e6ca51480e776e4bc3 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Sun, 15 Dec 2024 12:42:58 +0300 Subject: [PATCH 042/161] fix typo --- tests/e2e/test_operator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/e2e/test_operator.py b/tests/e2e/test_operator.py index 87f66a8b1..5f269cfc0 100644 --- a/tests/e2e/test_operator.py +++ b/tests/e2e/test_operator.py @@ -5085,7 +5085,7 @@ def check_replication(chi, replicas, token, table = ''): assert out == f"{token}", error() @TestScenario -@Name("test_053. Check that stadnard Kubernetes annotations are ignored if set to statefulset externally") +@Name("test_053. Check that standard Kubernetes annotations are ignored if set to StatefulSet externally") @Tags("NO_PARALLEL") def test_053(self): version_from = "0.23.7" @@ -5121,6 +5121,7 @@ def test_053(self): assert kubectl.get_field("statefulset", sts, ".spec.template.metadata.annotations.kubectl\.kubernetes\.io/restartedAt") != "" start_time = kubectl.get_field("pod", pod, ".status.startTime") + def check_restart(): with Then("ClickHouse pods should not be restarted during operator's restart"): new_start_time = kubectl.get_field("pod", pod, ".status.startTime") @@ -5152,7 +5153,6 @@ def check_restart(): check_restart() - with Finally("I clean up"): delete_test_namespace() From f00ab73c7b7922582b0f49f0d86b7584aab1467c Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Sun, 15 Dec 2024 12:43:43 +0300 Subject: [PATCH 043/161] dev: no need t place restore in reconciler --- pkg/controller/chi/worker-chi-reconciler.go | 8 -------- 1 file changed, 8 deletions(-) diff --git a/pkg/controller/chi/worker-chi-reconciler.go b/pkg/controller/chi/worker-chi-reconciler.go index a54855e8b..17cc84447 100644 --- a/pkg/controller/chi/worker-chi-reconciler.go +++ b/pkg/controller/chi/worker-chi-reconciler.go @@ -53,14 +53,6 @@ func (w *worker) reconcileCR(ctx context.Context, old, new *api.ClickHouseInstal return nil } - if new != nil { - n, err := w.c.kube.CR().Get(ctx, new.GetNamespace(), new.GetName()) - if err != nil { - return err - } - new = n.(*api.ClickHouseInstallation) - } - w.a.M(new).S().P() defer w.a.M(new).E().P() From 93ef1515ea68be9e289af225f2c2f328a9366a0c Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Sun, 15 Dec 2024 12:44:06 +0300 Subject: [PATCH 044/161] dev: restore cr state before reconcile --- pkg/controller/chi/worker.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pkg/controller/chi/worker.go b/pkg/controller/chi/worker.go index 438daa81b..e66ee7dfe 100644 --- a/pkg/controller/chi/worker.go +++ b/pkg/controller/chi/worker.go @@ -327,6 +327,14 @@ func (w *worker) updateCHI(ctx context.Context, old, new *api.ClickHouseInstalla return nil } + if new != nil { + n, err := w.c.kube.CR().Get(ctx, new.GetNamespace(), new.GetName()) + if err != nil { + return err + } + new = n.(*api.ClickHouseInstallation) + } + if w.deleteCHI(ctx, old, new) { // CHI is being deleted return nil From 134c3038ed4c58d5cc1bc615290641eaf3ea2892 Mon Sep 17 00:00:00 2001 From: alz Date: Mon, 16 Dec 2024 12:01:19 +0300 Subject: [PATCH 045/161] Remove x-fails that are not x-fails anymore --- tests/regression.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/tests/regression.py b/tests/regression.py index a2cfb5c7b..65358db68 100755 --- a/tests/regression.py +++ b/tests/regression.py @@ -7,13 +7,7 @@ xfails = { # test_operator.py - "/regression/e2e.test_operator/test_008*": [(Fail, "Test 008 sometimes fails due to unknown reasons")], - "/regression/e2e.test_operator/test_014*": [(Fail, "Test 014 sometimes fails due to unknown reasons")], - "/regression/e2e.test_operator/test_032*": [(Fail, "Test 032 sometimes fails due to unknown reasons")], - "/regression/e2e.test_operator/test_034*": [(Fail, "Test 034 is incorrect - metrics errors require reconcile")], - "/regression/e2e.test_operator/test_036*": [(Fail, "Test 036 sometimes fails due to unknown reasons")], - "/regression/e2e.test_operator/test_047*": [(Fail, "Test 047 sometimes fails due to unknown reasons")], - "/regression/e2e.test_operator/test_049*": [(Fail, "Test 049 sometimes fails due to unknown reasons")], + "/regression/e2e.test_operator/test_052*": [(Fail, "Keeper scale-up/scale-down is flaky")], # test_clickhouse.py "/regression/e2e.test_clickhouse/test_ch_001*": [(Fail, "Insert Quorum test need to refactoring")], # test_metrics_alerts.py From 93b2eb585c241403e78a8aca9f4086b15f511e2a Mon Sep 17 00:00:00 2001 From: alz Date: Tue, 17 Dec 2024 13:07:28 +0300 Subject: [PATCH 046/161] Test for 'suspend' attribute --- .../manifests/chi/test-006-ch-upgrade-1.yaml | 2 +- .../manifests/chi/test-006-ch-upgrade-2.yaml | 2 +- .../manifests/chi/test-006-ch-upgrade-3.yaml | 2 +- tests/e2e/test_operator.py | 60 +++++++++++++++++-- 4 files changed, 57 insertions(+), 9 deletions(-) diff --git a/tests/e2e/manifests/chi/test-006-ch-upgrade-1.yaml b/tests/e2e/manifests/chi/test-006-ch-upgrade-1.yaml index 288d048c8..8bc4e4382 100644 --- a/tests/e2e/manifests/chi/test-006-ch-upgrade-1.yaml +++ b/tests/e2e/manifests/chi/test-006-ch-upgrade-1.yaml @@ -17,4 +17,4 @@ spec: clusters: - name: default layout: - replicasCount: 2 + replicasCount: 1 diff --git a/tests/e2e/manifests/chi/test-006-ch-upgrade-2.yaml b/tests/e2e/manifests/chi/test-006-ch-upgrade-2.yaml index 93e42d0c5..96e45239d 100644 --- a/tests/e2e/manifests/chi/test-006-ch-upgrade-2.yaml +++ b/tests/e2e/manifests/chi/test-006-ch-upgrade-2.yaml @@ -17,4 +17,4 @@ spec: clusters: - name: default layout: - replicasCount: 2 + replicasCount: 1 diff --git a/tests/e2e/manifests/chi/test-006-ch-upgrade-3.yaml b/tests/e2e/manifests/chi/test-006-ch-upgrade-3.yaml index 042318b0a..32fd69562 100644 --- a/tests/e2e/manifests/chi/test-006-ch-upgrade-3.yaml +++ b/tests/e2e/manifests/chi/test-006-ch-upgrade-3.yaml @@ -17,4 +17,4 @@ spec: clusters: - name: default layout: - replicasCount: 2 + replicasCount: 1 diff --git a/tests/e2e/test_operator.py b/tests/e2e/test_operator.py index 5f269cfc0..cb6241401 100644 --- a/tests/e2e/test_operator.py +++ b/tests/e2e/test_operator.py @@ -148,29 +148,32 @@ def test_006(self): old_version = "clickhouse/clickhouse-server:23.8" new_version = "clickhouse/clickhouse-server:24.3" - with Then("Create initial position"): + chi = "test-006" + + with Then(f"Start CHI with version {old_version}"): kubectl.create_and_check( manifest="manifests/chi/test-006-ch-upgrade-1.yaml", check={ - "pod_count": 2, + "pod_count": 1, "pod_image": old_version, "do_not_delete": 1, }, ) - with Then("Use different podTemplate and confirm that pod image is updated"): + with Then(f"Use different podTemplate and confirm that pod image is updated to {new_version}"): kubectl.create_and_check( manifest="manifests/chi/test-006-ch-upgrade-2.yaml", check={ - "pod_count": 2, + "pod_count": 1, "pod_image": new_version, "do_not_delete": 1, }, ) - with Then("Change image in podTemplate itself and confirm that pod image is updated"): + + with Then(f"Change image in podTemplate itself and confirm that pod image is updated back to {old_version}"): kubectl.create_and_check( manifest="manifests/chi/test-006-ch-upgrade-3.yaml", check={ - "pod_count": 2, + "pod_count": 1, "pod_image": old_version, }, ) @@ -5156,6 +5159,51 @@ def check_restart(): with Finally("I clean up"): delete_test_namespace() +@TestScenario +@Name("test_054. Test that 'suspend' mode delays any changes until unsuspended") +@Requirements(RQ_SRS_026_ClickHouseOperator_Managing_VersionUpgrades("1.0")) +def test_054(self): + create_shell_namespace_clickhouse_template() + chi = yaml_manifest.get_name(util.get_full_path("manifests/chi/test-006-ch-upgrade-1.yaml")) + + old_version = "clickhouse/clickhouse-server:23.8" + new_version = "clickhouse/clickhouse-server:24.3" + with Then(f"Start CHI with version {old_version}"): + kubectl.create_and_check( + manifest="manifests/chi/test-006-ch-upgrade-1.yaml", + check={ + "pod_count": 1, + "pod_image": old_version, + "do_not_delete": 1, + }, + ) + + with Then("Add suspend attribute to CHI"): + cmd = f'patch chi {chi} --type=\'json\' --patch=\'[{{"op":"add","path":"/spec/suspend","value":"yes"}}]\'' + kubectl.launch(cmd) + + with Then(f"Update podTemplate to {new_version} and confirm that pod image is NOT updated"): + kubectl.create_and_check( + manifest="manifests/chi/test-006-ch-upgrade-2.yaml", + check={ + "pod_count": 1, + "pod_image": old_version, + "do_not_delete": 1, + }, + ) + + with Then("Remove suspend attribute from CHI"): + cmd = f'patch chi {chi} --type=\'json\' --patch=\'[{{"op":"remove","path":"/spec/suspend"}}]\'' + kubectl.launch(cmd) + + kubectl.wait_chi_status(chi, "InProgress") + kubectl.wait_chi_status(chi, "Completed") + + with Then(f"Confirm that pod image is updated to {new_version}"): + kubectl.check_pod_image(chi, new_version) + + with Finally("I clean up"): + delete_test_namespace() @TestModule @Name("e2e.test_operator") From 5b37e9653ad78d225bd013bb5ef1c0ce2db777ec Mon Sep 17 00:00:00 2001 From: alz Date: Tue, 17 Dec 2024 14:49:09 +0300 Subject: [PATCH 047/161] Extract normalizedCompleted from a chi-storage configmap --- tests/e2e/kubectl.py | 3 +++ tests/e2e/test_operator.py | 11 ++++++----- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/tests/e2e/kubectl.py b/tests/e2e/kubectl.py index 9fc4adc4c..ca00f2af7 100644 --- a/tests/e2e/kubectl.py +++ b/tests/e2e/kubectl.py @@ -203,6 +203,9 @@ def get(kind, name, label="", ns=None, ok_to_fail=False, shell=None): out = launch(f"get {kind} {name} {label} -o json", ns=ns, ok_to_fail=ok_to_fail, shell=shell) return json.loads(out.strip()) +def get_chi_normalizedCompleted(chi, ns=None, shell=None): + chi_storage = get("configmap", f"chi-storage-{chi}", ns=ns) + return json.loads(chi_storage["data"]["status-normalizedCompleted"]) def create_ns(ns): if ns is None: diff --git a/tests/e2e/test_operator.py b/tests/e2e/test_operator.py index cb6241401..8180e89f5 100644 --- a/tests/e2e/test_operator.py +++ b/tests/e2e/test_operator.py @@ -858,9 +858,9 @@ def test_011_2(self): ) with Then("Default user plain password should be removed"): - chi = kubectl.get("chi", "test-011-secured-default") - # assert "default/password" in chi["status"]["normalizedCompleted"]["spec"]["configuration"]["users"] - # assert chi["status"]["normalizedCompleted"]["spec"]["configuration"]["users"]["default/password"] == "" + normalizedCompleted = kubectl.get_chi_normalizedCompleted("test-011-secured-default") + assert "default/password" in normalizedCompleted["spec"]["configuration"]["users"] + assert normalizedCompleted["spec"]["configuration"]["users"]["default/password"] == "" cfm = kubectl.get("configmap", "chi-test-011-secured-default-common-usersd") assert '' in cfm["data"]["chop-generated-users.xml"] @@ -2621,8 +2621,9 @@ def test_023(self): assert kubectl.get_field("chi", chi, ".status.usedTemplates[1].name") == "extension-annotations" # assert kubectl.get_field("chi", chi, ".status.usedTemplates[2].name") == "" - # with Then("Annotation from a template should be populated"): - # assert kubectl.get_field("chi", chi, ".status.normalizedCompleted.metadata.annotations.test") == "test" + with Then("Annotation from a template should be populated"): + normalizedCompleted = kubectl.get_chi_normalizedCompleted(chi) + assert normalizedCompleted["metadata"]["annotations"]["test"] == "test" with Then("Pod annotation should populated from template"): assert kubectl.get_field("pod", f"chi-{chi}-single-0-0-0", ".metadata.annotations.test") == "test" with Then("Environment variable from a template should be populated"): From 9b72b02946faa99bafc0a16de4c884c741d10255 Mon Sep 17 00:00:00 2001 From: alz Date: Tue, 17 Dec 2024 22:18:54 +0300 Subject: [PATCH 048/161] simplify test_030 --- tests/e2e/test_operator.py | 38 ++++++++------------------------------ 1 file changed, 8 insertions(+), 30 deletions(-) diff --git a/tests/e2e/test_operator.py b/tests/e2e/test_operator.py index 8180e89f5..78b299194 100644 --- a/tests/e2e/test_operator.py +++ b/tests/e2e/test_operator.py @@ -3208,52 +3208,30 @@ def test_030(self): }, ) - with When("I create new shells"): - shell_1 = get_shell() - shell_2 = get_shell() - - trigger_event = threading.Event() - Check("Check that cluster definition does not change during restart", test=check_remote_servers, parallel=True)( - chi=chi, - cluster="default", - shards=2, - trigger_event=trigger_event, - shell=shell_1, - ) - with When("Delete CRD"): - kubectl.launch("delete crd clickhouseinstallations.clickhouse.altinity.com", shell=shell_2) + kubectl.launch("delete crd clickhouseinstallations.clickhouse.altinity.com") with Then("CHI should be deleted"): - kubectl.wait_object("chi", chi, count=0, shell=shell_2) + kubectl.wait_object("chi", chi, count=0) with And("CHI objects SHOULD NOT be deleted"): - assert kubectl.count_objects(label=f"-l clickhouse.altinity.com/chi={chi}", shell=shell_2) == object_counts + assert kubectl.count_objects(label=f"-l clickhouse.altinity.com/chi={chi}") == object_counts - pod = kubectl.get_pod_names(chi, shell=shell_2)[0] - start_time = kubectl.get_field("pod", pod, ".status.startTime", shell=shell_2) + pod = kubectl.get_pod_names(chi)[0] + start_time = kubectl.get_field("pod", pod, ".status.startTime") with When("Reinstall the operator"): - util.install_operator_if_not_exist(reinstall=True, shell=shell_2) + util.install_operator_if_not_exist(reinstall=True) with Then("Re-create CHI"): kubectl.create_and_check( manifest, check={ "object_counts": object_counts, "do_not_delete": 1, - }, - shell = shell_2 + } ) with Then("Pods should not be restarted"): - new_start_time = kubectl.get_field("pod", pod, ".status.startTime", shell=shell_2) + new_start_time = kubectl.get_field("pod", pod, ".status.startTime") assert start_time == new_start_time - # Terminate check - trigger_event.set() - join() - - with Then("I recreate shell"): - shell = get_shell() - self.context.shell = shell - with Finally("I clean up"): delete_test_namespace() From 6e02d53cfe9f01d0eba7d0eb43eb48bc5a73d373 Mon Sep 17 00:00:00 2001 From: alz Date: Wed, 18 Dec 2024 10:42:22 +0300 Subject: [PATCH 049/161] Stabilize tests --- .../chi/{test-030.yaml => test-099.yaml} | 2 +- .../manifests/chopconf/test-034-chopconf.yaml | 4 +++ tests/e2e/test_operator.py | 31 ++++++++++++------- 3 files changed, 25 insertions(+), 12 deletions(-) rename tests/e2e/manifests/chi/{test-030.yaml => test-099.yaml} (93%) diff --git a/tests/e2e/manifests/chi/test-030.yaml b/tests/e2e/manifests/chi/test-099.yaml similarity index 93% rename from tests/e2e/manifests/chi/test-030.yaml rename to tests/e2e/manifests/chi/test-099.yaml index 44f6f1a7b..3d7ed49bf 100644 --- a/tests/e2e/manifests/chi/test-030.yaml +++ b/tests/e2e/manifests/chi/test-099.yaml @@ -1,7 +1,7 @@ apiVersion: "clickhouse.altinity.com/v1" kind: "ClickHouseInstallation" metadata: - name: test-030 + name: test-099 spec: useTemplates: - name: clickhouse-version diff --git a/tests/e2e/manifests/chopconf/test-034-chopconf.yaml b/tests/e2e/manifests/chopconf/test-034-chopconf.yaml index c61daca72..3d4d24746 100644 --- a/tests/e2e/manifests/chopconf/test-034-chopconf.yaml +++ b/tests/e2e/manifests/chopconf/test-034-chopconf.yaml @@ -8,3 +8,7 @@ spec: # Port where to connect to ClickHouse instances to scheme: https port: 8443 + reconcile: + statefulSet: + update: + timeout: 90 diff --git a/tests/e2e/test_operator.py b/tests/e2e/test_operator.py index 78b299194..94a1cdd3f 100644 --- a/tests/e2e/test_operator.py +++ b/tests/e2e/test_operator.py @@ -3191,12 +3191,15 @@ def test_029(self): @TestScenario -@Name("test_030. Test CRD deletion") +@Name("test_099. Test CRD deletion. Should be executed at the end") @Tags("NO_PARALLEL") -def test_030(self): +def test_099(self): create_shell_namespace_clickhouse_template() - manifest = "manifests/chi/test-030.yaml" + # delete existing chis if any in order to avoid side effects + cleanup_chis(self) + + manifest = "manifests/chi/test-099.yaml" chi = yaml_manifest.get_name(util.get_full_path(manifest)) object_counts = {"statefulset": 2, "pod": 2, "service": 3} @@ -3537,9 +3540,12 @@ def test_034(self): with And("Re-create operator pod in order to restart metrics exporter to update the configuration [1]"): util.restart_operator() + kubectl.wait_chi_status(chi, "Completed") + out = kubectl.launch("get pods -l app=clickhouse-operator", ns=current().context.operator_namespace).splitlines()[1] operator_pod = re.split(r"[\t\r\n\s]+", out)[0] + with Then("check for `chi_clickhouse_metric_fetch_errors` is not zero"): check_metrics_monitoring( operator_namespace=operator_namespace, @@ -5184,6 +5190,16 @@ def test_054(self): with Finally("I clean up"): delete_test_namespace() +def cleanup_chis(self): + with Given("Cleanup CHIs"): + ns = kubectl.get("ns", name="", ns="--all-namespaces") + if "items" in ns: + for n in ns["items"]: + ns_name = n["metadata"]["name"] + if ns_name.startswith("test") and ns_name != self.context.test_namespace: + with Then(f"Delete ns {ns_name}"): + util.delete_namespace(namespace = ns_name, delete_chi=True) + @TestModule @Name("e2e.test_operator") @Requirements(RQ_SRS_026_ClickHouseOperator_CustomResource_APIVersion("1.0"), @@ -5196,14 +5212,7 @@ def test(self): shell = get_shell() self.context.shell = shell - with Given("Cleanup CHIs"): - ns = kubectl.get("ns", name="", ns="--all-namespaces") - if "items" in ns: - for n in ns["items"]: - ns_name = n["metadata"]["name"] - if ns_name.startswith("test"): - with Then(f"Delete ns {ns_name}"): - util.delete_namespace(namespace = ns_name, delete_chi=True) + cleanup_chis(self) # Placeholder for selective test running # run_tests = [test_008, test_009] From 34129249a232adbe5ddddc2c6ae32d83c5054bdf Mon Sep 17 00:00:00 2001 From: alz Date: Wed, 18 Dec 2024 13:17:14 +0300 Subject: [PATCH 050/161] Stabilize operator restart tests --- tests/e2e/test_operator.py | 102 +++++++++++++++++++------------------ 1 file changed, 52 insertions(+), 50 deletions(-) diff --git a/tests/e2e/test_operator.py b/tests/e2e/test_operator.py index 94a1cdd3f..94679d24f 100644 --- a/tests/e2e/test_operator.py +++ b/tests/e2e/test_operator.py @@ -271,7 +271,6 @@ def test_operator_restart(self, manifest, service, version=None): shell_1 = get_shell() shell_2 = get_shell() shell_3 = get_shell() - shell_4 = get_shell() Check("run query until receive stop event", test=run_select_query, parallel=True)( host=service, @@ -307,15 +306,14 @@ def test_operator_restart(self, manifest, service, version=None): "pod": 2, "service": 3, }, - pod=f"chi-{chi}-{cluster}-0-0-0", - shell=shell_4 + pod=f"chi-{chi}-{cluster}-0-0-0" ) trigger_event.set() join() - with Then("I recreate shell"): - shell = get_shell() - self.context.shell = shell + # with Then("I recreate shell"): + # shell = get_shell() + # self.context.shell = shell with Then("Local tables should have exactly the same number of rows"): cnt0 = clickhouse.query(chi, "select count() from test_local", host=f'chi-{chi}-{cluster}-0-0-0') @@ -356,16 +354,18 @@ def check_remote_servers(self, chi, shards, trigger_event, shell=None, cluster=" cluster = chi ok_runs = 0 - while not trigger_event.is_set(): - chi_shards = get_shards_from_remote_servers(chi, cluster, shell=shell) + with Then(f"Check remote_servers contains {shards} shards until receiving a stop event"): + while not trigger_event.is_set(): + chi_shards = get_shards_from_remote_servers(chi, cluster, shell=shell) - if chi_shards != shards: - with Then(f"Number of shards in {cluster} cluster should be {shards} got {chi_shards} instead"): - assert chi_shards == shards - ok_runs += 1 - time.sleep(1) + if chi_shards != shards: + with Then(f"Number of shards in {cluster} cluster should be {shards} got {chi_shards} instead"): + assert chi_shards == shards + + ok_runs += 1 + time.sleep(0.5) - with By(f"remote_servers were always correct {ok_runs} times"): + with Then(f"remote_servers were always correct {ok_runs} times"): assert ok_runs > 0 @@ -464,9 +464,9 @@ def test_008_3(self): trigger_event.set() join() - with Then("I recreate shell"): - shell = get_shell() - self.context.shell = shell + # with Then("I recreate shell"): + # shell = get_shell() + # self.context.shell = shell with Finally("I clean up"): delete_test_namespace() @@ -540,9 +540,9 @@ def test_operator_upgrade(self, manifest, service, version_from, version_to=None trigger_event.set() join() - with Then("I recreate shell"): - shell = get_shell() - self.context.shell = shell + # with Then("I recreate shell"): + # shell = get_shell() + # self.context.shell = shell with Then("Check that table is here"): tables = clickhouse.query(chi, "SHOW TABLES") @@ -3307,19 +3307,19 @@ def run_select_query(self, host, user, password, query, res1, res2, trigger_even client_pod = "clickhouse-client" - try: - + with When("fCreate {client_pod} pod"): kubectl.launch(f'run {client_pod} --image={current().context.clickhouse_version} -- /bin/sh -c "sleep 3600"', shell=shell) kubectl.wait_pod_status(client_pod, "Running", shell=shell) - ok = 0 - partial = 0 - errors = 0 - run = 0 - partial_runs = [] - error_runs = [] + ok = 0 + partial = 0 + errors = 0 + run = 0 + partial_runs = [] + error_runs = [] - cmd = f'exec -n {self.context.test_namespace} {client_pod} -- clickhouse-client --user={user} --password={password} -h {host} -q "{query}"' + cmd = f'exec -n {self.context.test_namespace} {client_pod} -- clickhouse-client --user={user} --password={password} -h {host} -q "{query}"' + with Then("Run select queries until receiving a stop event"): while not trigger_event.is_set(): run += 1 # Adjust time to glog's format @@ -3338,7 +3338,8 @@ def run_select_query(self, host, user, password, query, res1, res2, trigger_even print("*** RUN_QUERY ERROR ***") print(cnt_test) time.sleep(0.5) - with By( + + with Then( f"{run} queries have been executed, of which: " + f"{ok} queries have been executed with no errors, " + f"{partial} queries returned incomplete results, " + @@ -3348,13 +3349,11 @@ def run_select_query(self, host, user, password, query, res1, res2, trigger_even ): assert errors == 0, error() if partial > 0: - print( - f"*** WARNING ***: cluster was partially unavailable, {partial} queries returned incomplete results" - ) - finally: - with Finally("I clean up"): - with By("deleting pod"): - kubectl.launch(f"delete pod {client_pod}", shell=shell) + print(f"*** WARNING ***: cluster was partially unavailable, {partial} queries returned incomplete results") + + # with Finally("I clean up"): # can not cleanup, since threads may join already and shell may be unavailable + # with By("deleting pod"): + # kubectl.launch(f"delete pod {client_pod}", shell=shell) @TestCheck @@ -3362,14 +3361,17 @@ def run_insert_query(self, host, user, password, query, trigger_event, shell=Non """Run an insert query in parallel until the stop signal is received.""" client_pod = "clickhouse-insert" - try: + + with Then(f"Create {client_pod} pod"): kubectl.launch(f'run {client_pod} --image={current().context.clickhouse_version} -- /bin/sh -c "sleep 3600"', shell=shell) kubectl.wait_pod_status(client_pod, "Running", shell=shell) - ok = 0 - errors = 0 + ok = 0 + errors = 0 + + cmd = f'exec -n {self.context.test_namespace} {client_pod} -- clickhouse-client --user={user} --password={password} -h {host} -q "{query}"' - cmd = f'exec -n {self.context.test_namespace} {client_pod} -- clickhouse-client --user={user} --password={password} -h {host} -q "{query}"' + with Then("Run insert queries until receiving a stop event"): while not trigger_event.is_set(): res = kubectl.launch(cmd, ok_to_fail=True, shell=shell) if res == "": @@ -3377,13 +3379,13 @@ def run_insert_query(self, host, user, password, query, trigger_event, shell=Non else: note(f"WTF res={res}") errors += 1 - with By(f"{ok} inserts have been executed with no errors, {errors} inserts have failed"): - assert errors == 0, error() - finally: - with Finally("I clean up"): - with By("deleting pod"): - kubectl.launch(f"delete pod {client_pod}", shell=shell) + time.sleep(0.5) + with Then(f"{ok} inserts have been executed with no errors, {errors} inserts have failed"): + assert errors == 0, error() + # with Finally("I clean up"): # can not cleanup, since threads may join already and shell may be unavailable + # with By("deleting pod"): + # kubectl.launch(f"delete pod {client_pod}", shell=shell) @TestScenario @Name("test_032. Test rolling update logic") @@ -3484,9 +3486,9 @@ def test_032(self): trigger_event.set() join() - with Then("I recreate shell"): - shell = get_shell() - self.context.shell = shell + # with Then("I recreate shell"): + # shell = get_shell() + # self.context.shell = shell with Finally("I clean up"): delete_test_namespace() From da0f2468587ebc14c418565849089a0816af649d Mon Sep 17 00:00:00 2001 From: Eugene Klimov Date: Wed, 18 Dec 2024 18:03:50 +0400 Subject: [PATCH 051/161] update prometheus operator to 0.79.0 (#1599) --- deploy/prometheus/create-prometheus.sh | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/deploy/prometheus/create-prometheus.sh b/deploy/prometheus/create-prometheus.sh index 274db7999..55db2c715 100755 --- a/deploy/prometheus/create-prometheus.sh +++ b/deploy/prometheus/create-prometheus.sh @@ -1,12 +1,10 @@ #!/bin/bash echo "External value for \$PROMETHEUS_NAMESPACE=$PROMETHEUS_NAMESPACE" -echo "External value for \$OPERATOR_NAMESPACE=$OPERATOR_NAMESPACE" echo "External value for \$VALIDATE_YAML=$VALIDATE_YAML" export PROMETHEUS_NAMESPACE="${PROMETHEUS_NAMESPACE:-prometheus}" -export OPERATOR_NAMESPACE="${OPERATOR_NAMESPACE:-kube-system}" -export PROMETHEUS_OPERATOR_BRANCH="${PROMETHEUS_OPERATOR_BRANCH:-v0.68.0}" +export PROMETHEUS_OPERATOR_BRANCH="${PROMETHEUS_OPERATOR_BRANCH:-v0.79.0}" export ALERT_MANAGER_EXTERNAL_URL="${ALERT_MANAGER_EXTERNAL_URL:-http://localhost:9093}" # Possible values for "validate yaml" are values from --validate=XXX kubectl option. They are true/false ATM export VALIDATE_YAML="${VALIDATE_YAML:-true}" @@ -15,7 +13,6 @@ CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)" echo "OPTIONS" echo "Setup Prometheus into \$PROMETHEUS_NAMESPACE=${PROMETHEUS_NAMESPACE} namespace" -echo "Expecting operator in \$OPERATOR_NAMESPACE=${OPERATOR_NAMESPACE} namespace" echo "Validate .yaml file \$VALIDATE_YAML=${VALIDATE_YAML}" echo "" echo "!!! IMPORTANT !!!" From 55698520b14b44021874585e824ea07779440376 Mon Sep 17 00:00:00 2001 From: alz Date: Wed, 18 Dec 2024 18:38:05 +0300 Subject: [PATCH 052/161] Faster and more reliable test_047 --- .../chi/test-047-zero-weighted-shard.yaml | 4 -- tests/e2e/test_operator.py | 55 +++++++++---------- 2 files changed, 25 insertions(+), 34 deletions(-) diff --git a/tests/e2e/manifests/chi/test-047-zero-weighted-shard.yaml b/tests/e2e/manifests/chi/test-047-zero-weighted-shard.yaml index 4d5bbdbf4..9679d0f55 100644 --- a/tests/e2e/manifests/chi/test-047-zero-weighted-shard.yaml +++ b/tests/e2e/manifests/chi/test-047-zero-weighted-shard.yaml @@ -6,10 +6,6 @@ spec: useTemplates: - name: clickhouse-version configuration: - zookeeper: - nodes: - - host: zookeeper - port: 2181 clusters: - name: default layout: diff --git a/tests/e2e/test_operator.py b/tests/e2e/test_operator.py index 94679d24f..3aa244d0a 100644 --- a/tests/e2e/test_operator.py +++ b/tests/e2e/test_operator.py @@ -1290,7 +1290,7 @@ def get_shards_from_remote_servers(chi, cluster, shell=None): def wait_for_cluster(chi, cluster, num_shards, num_replicas=0, pwd="", force_wait=False): with Given(f"Cluster {cluster} is properly configured"): if current().context.operator_version >= "0.24" and force_wait is False: - print(f"operator {current().context.operator_version} does not require extra wait, skipping check") + note(f"operator {current().context.operator_version} does not require extra wait, skipping check") else: with By(f"remote_servers have {num_shards} shards"): assert num_shards == get_shards_from_remote_servers(chi, cluster) @@ -4583,7 +4583,6 @@ def test_047(self): check that data not inserted into zero-weighted shard in distributed table.""" create_shell_namespace_clickhouse_template() - util.require_keeper(keeper_type=self.context.keeper_type) manifest = f"manifests/chi/test-047-zero-weighted-shard.yaml" chi = yaml_manifest.get_name(util.get_full_path(manifest)) cluster = "default" @@ -4595,24 +4594,32 @@ def test_047(self): "do_not_delete": 1, }, ) + wait_for_cluster(chi, cluster, 2, force_wait = True) + + with Then("I check weight is specified in /etc/clickhouse-server/config.d/chop-generated-remote_servers.xml file"): + r = kubectl.launch( + f"""exec chi-{chi}-default-0-0-0 -- bash -c 'cat """ + f"""/etc/clickhouse-server/config.d/chop-generated-remote_servers.xml | head -n 7 | tail -n 1'""" + ) + assert "0" in r + r = kubectl.launch( + f"""exec chi-{chi}-default-0-0-0 -- bash -c 'cat """ + f"""/etc/clickhouse-server/config.d/chop-generated-remote_servers.xml | head -n 16 | tail -n 1'""" + ) + assert "1" in r + + numbers = 100 with When("I create distributed table"): - create_table = """ - CREATE TABLE test_local_047 ON CLUSTER 'default' (a UInt32) - Engine = ReplicatedMergeTree('/clickhouse/{installation}/tables/{shard}/{database}/{table}', '{replica}') - PARTITION BY tuple() - ORDER BY a - """.replace( - "\r", "" - ).replace( - "\n", "" - ) - clickhouse.query(chi, create_table) - clickhouse.query( - chi, - "CREATE TABLE test_distr_047 ON CLUSTER 'default' AS test_local_047 " - "Engine = Distributed('default', default, test_local_047, a%2)", - ) + for shard in (0,1): + clickhouse.query( + chi, + "CREATE TABLE test_local_047 (a UInt32) Engine = MergeTree PARTITION BY tuple() ORDER BY a", + host = f"chi-{chi}-{cluster}-{shard}-0-0") + clickhouse.query( + chi, + "CREATE TABLE test_distr_047 AS test_local_047 Engine = Distributed('default', default, test_local_047, a%2)", + host = f"chi-{chi}-{cluster}-{shard}-0-0") with And("I insert data in the distributed table"): clickhouse.query(chi, f"INSERT INTO test_distr_047 select * from numbers({numbers})") @@ -4627,18 +4634,6 @@ def test_047(self): out = clickhouse.query(chi, "SELECT count(*) from test_distr_047", host=f"chi-{chi}-{cluster}-1-0-0") assert out == f"{numbers}" - with Then("I check weight is specified in /etc/clickhouse-server/config.d/chop-generated-remote_servers.xml file"): - r = kubectl.launch( - f"""exec chi-{chi}-default-0-0-0 -- bash -c 'cat """ - f"""/etc/clickhouse-server/config.d/chop-generated-remote_servers.xml | head -n 7 | tail -n 1'""" - ) - assert "0" in r - r = kubectl.launch( - f"""exec chi-{chi}-default-0-0-0 -- bash -c 'cat """ - f"""/etc/clickhouse-server/config.d/chop-generated-remote_servers.xml | head -n 16 | tail -n 1'""" - ) - assert "1" in r - with Finally("I clean up"): delete_test_namespace() From 1f0f94197f2b5e2f554283824dcbd4cd95224302 Mon Sep 17 00:00:00 2001 From: alz Date: Thu, 19 Dec 2024 09:00:06 +0300 Subject: [PATCH 053/161] Remove redundant check at the beginning of host reconcile. --- pkg/controller/chi/worker-chi-reconciler.go | 28 +++++++++++---------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/pkg/controller/chi/worker-chi-reconciler.go b/pkg/controller/chi/worker-chi-reconciler.go index 17cc84447..558fc5877 100644 --- a/pkg/controller/chi/worker-chi-reconciler.go +++ b/pkg/controller/chi/worker-chi-reconciler.go @@ -717,19 +717,21 @@ func (w *worker) reconcileHost(ctx context.Context, host *api.Host) error { // reconcileHostPrepare reconciles specified ClickHouse host func (w *worker) reconcileHostPrepare(ctx context.Context, host *api.Host) error { // Check whether ClickHouse is running and accessible and what version is available - if version, err := w.getHostClickHouseVersion(ctx, host, versionOptions{skipNew: true, skipStoppedAncestor: true}); err == nil { - w.a.V(1). - WithEvent(host.GetCR(), a.EventActionReconcile, a.EventReasonReconcileStarted). - WithAction(host.GetCR()). - M(host).F(). - Info("Reconcile Host start. Host: %s ClickHouse version running: %s", host.GetName(), version) - } else { - w.a.V(1). - WithEvent(host.GetCR(), a.EventActionReconcile, a.EventReasonReconcileStarted). - WithAction(host.GetCR()). - M(host).F(). - Warning("Reconcile Host start. Host: %s Failed to get ClickHouse version: %s", host.GetName(), version) - } + + // alz 18.12.2024: Host may be down or not accessible, so no reason to wait +// if version, err := w.getHostClickHouseVersion(ctx, host, versionOptions{skipNew: true, skipStoppedAncestor: true}); err == nil { +// w.a.V(1). +// WithEvent(host.GetCR(), a.EventActionReconcile, a.EventReasonReconcileStarted). +// WithAction(host.GetCR()). +// M(host).F(). +// Info("Reconcile Host start. Host: %s ClickHouse version running: %s", host.GetName(), version) +// } else { +// w.a.V(1). +// WithEvent(host.GetCR(), a.EventActionReconcile, a.EventReasonReconcileStarted). +// WithAction(host.GetCR()). +// M(host).F(). +// Warning("Reconcile Host start. Host: %s Failed to get ClickHouse version: %s", host.GetName(), version) +// } if w.excludeHost(ctx, host) { // Need to wait to complete queries only in case host is excluded from the cluster From 0af524c7e11d54b4ab7d3eac1eeeb68178c0dc1a Mon Sep 17 00:00:00 2001 From: alz Date: Thu, 19 Dec 2024 10:48:15 +0300 Subject: [PATCH 054/161] Fix test_014_0 to work with CHK --- tests/e2e/test_operator.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/tests/e2e/test_operator.py b/tests/e2e/test_operator.py index 3aa244d0a..8f2cb888f 100644 --- a/tests/e2e/test_operator.py +++ b/tests/e2e/test_operator.py @@ -1597,9 +1597,18 @@ def check_schema_propagation(replicas): ) assert out == "1" - with When("Restart keeper pod"): - with Then("Delete Zookeeper pod"): - kubectl.launch(f"delete pod {self.context.keeper_type}-0") + with When("Restart (Zoo)Keeper pod"): + if self.context.keeper_type == "zookeeper": + keeper_pod = "zookeeper-0" + elif self.context.keeper_type == "clickhouse-keeper": + keeper_pod = "clickhouse-keeper-0" + elif self.context.keeper_type == "chk": + keeper_pod = "chk-clickhouse-keeper-test-0-0-0" + else: + error(f"Unsupported Keeper type {self.context.keeper_type}") + + with Then("Delete (Zoo)Keeper pod"): + kubectl.launch(f"delete pod {keeper_pod}") time.sleep(1) with Then(f"try insert into the table while {self.context.keeper_type} offline table should be in readonly mode"): @@ -1607,8 +1616,8 @@ def check_schema_propagation(replicas): assert "Table is in readonly mode" in out with Then(f"Wait for {self.context.keeper_type} pod to come back"): - kubectl.wait_object("pod", f"{self.context.keeper_type}-0") - kubectl.wait_pod_status(f"{self.context.keeper_type}-0", "Running") + kubectl.wait_object("pod", keeper_pod) + kubectl.wait_pod_status(keeper_pod, "Running") with Then(f"Wait for ClickHouse to reconnect to {self.context.keeper_type} and switch from read-write mode"): util.wait_clickhouse_no_readonly_replicas(chi) @@ -1663,7 +1672,7 @@ def check_schema_propagation(replicas): "do_not_delete": 1, }, ) - with Then("Tables are deleted in ZooKeeper"): + with Then("Tables are deleted in (Zoo)Keeper"): out = clickhouse.query_with_error( chi_name, f"SELECT count() FROM system.zookeeper WHERE path ='/clickhouse/{cluster}/tables/0/default'", From ad9483f8f42fe13488511b0d0c1bd1b4b202c62c Mon Sep 17 00:00:00 2001 From: alz Date: Thu, 19 Dec 2024 11:40:03 +0300 Subject: [PATCH 055/161] Fix .status.hostsWithTablesCreated population where actions are disabled --- pkg/apis/clickhouse-keeper.altinity.com/v1/type_status.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkg/apis/clickhouse-keeper.altinity.com/v1/type_status.go b/pkg/apis/clickhouse-keeper.altinity.com/v1/type_status.go index 82a4b479b..e0c7b6a56 100644 --- a/pkg/apis/clickhouse-keeper.altinity.com/v1/type_status.go +++ b/pkg/apis/clickhouse-keeper.altinity.com/v1/type_status.go @@ -369,6 +369,7 @@ func prepareOptions(opts types.CopyStatusOptions) types.CopyStatusOptions { opts.Copy.HostsCompletedCount = true opts.Copy.HostsDeletedCount = true opts.Copy.HostsDeleteCount = true + opts.Copy.HostsWithTablesCreated = true opts.Copy.Pods = true opts.Copy.PodIPs = true opts.Copy.FQDNs = true @@ -404,6 +405,7 @@ func prepareOptions(opts types.CopyStatusOptions) types.CopyStatusOptions { opts.Copy.HostsCompletedCount = true opts.Copy.HostsDeletedCount = true opts.Copy.HostsDeleteCount = true + opts.Copy.HostsWithTablesCreated = true opts.Copy.Pods = true opts.Copy.PodIPs = true opts.Copy.FQDNs = true From f6c1ddfc423f7a5bdbdcfdd05b1dd342e92eda62 Mon Sep 17 00:00:00 2001 From: alz Date: Thu, 19 Dec 2024 14:34:39 +0300 Subject: [PATCH 056/161] Fix .status.hostsWithTablesCreated in CHI. Remove from CHK --- pkg/apis/clickhouse-keeper.altinity.com/v1/type_status.go | 4 ---- pkg/apis/clickhouse.altinity.com/v1/type_status.go | 2 ++ 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/pkg/apis/clickhouse-keeper.altinity.com/v1/type_status.go b/pkg/apis/clickhouse-keeper.altinity.com/v1/type_status.go index e0c7b6a56..807f78237 100644 --- a/pkg/apis/clickhouse-keeper.altinity.com/v1/type_status.go +++ b/pkg/apis/clickhouse-keeper.altinity.com/v1/type_status.go @@ -330,14 +330,12 @@ func prepareOptions(opts types.CopyStatusOptions) types.CopyStatusOptions { opts.Copy.TaskIDsCompleted = true opts.Copy.Actions = true opts.Copy.Errors = true - opts.Copy.HostsWithTablesCreated = true opts.Copy.UsedTemplates = true } if opts.FieldGroupActions { opts.Copy.Action = true opts.Merge.Actions = true - opts.Copy.HostsWithTablesCreated = true opts.Copy.UsedTemplates = true } @@ -369,7 +367,6 @@ func prepareOptions(opts types.CopyStatusOptions) types.CopyStatusOptions { opts.Copy.HostsCompletedCount = true opts.Copy.HostsDeletedCount = true opts.Copy.HostsDeleteCount = true - opts.Copy.HostsWithTablesCreated = true opts.Copy.Pods = true opts.Copy.PodIPs = true opts.Copy.FQDNs = true @@ -405,7 +402,6 @@ func prepareOptions(opts types.CopyStatusOptions) types.CopyStatusOptions { opts.Copy.HostsCompletedCount = true opts.Copy.HostsDeletedCount = true opts.Copy.HostsDeleteCount = true - opts.Copy.HostsWithTablesCreated = true opts.Copy.Pods = true opts.Copy.PodIPs = true opts.Copy.FQDNs = true diff --git a/pkg/apis/clickhouse.altinity.com/v1/type_status.go b/pkg/apis/clickhouse.altinity.com/v1/type_status.go index 4ac023f90..a627d70d9 100644 --- a/pkg/apis/clickhouse.altinity.com/v1/type_status.go +++ b/pkg/apis/clickhouse.altinity.com/v1/type_status.go @@ -368,6 +368,7 @@ func prepareOptions(opts types.CopyStatusOptions) types.CopyStatusOptions { opts.Copy.HostsCompletedCount = true opts.Copy.HostsDeletedCount = true opts.Copy.HostsDeleteCount = true + opts.Copy.HostsWithTablesCreated = true opts.Copy.Pods = true opts.Copy.PodIPs = true opts.Copy.FQDNs = true @@ -403,6 +404,7 @@ func prepareOptions(opts types.CopyStatusOptions) types.CopyStatusOptions { opts.Copy.HostsCompletedCount = true opts.Copy.HostsDeletedCount = true opts.Copy.HostsDeleteCount = true + opts.Copy.HostsWithTablesCreated = true opts.Copy.Pods = true opts.Copy.PodIPs = true opts.Copy.FQDNs = true From 96d0c3d63d8a78de4f23c2c7c637692a8d93f76b Mon Sep 17 00:00:00 2001 From: alz Date: Thu, 19 Dec 2024 17:00:29 +0300 Subject: [PATCH 057/161] Remove unused fields from wide status output --- ...l-template-01-section-crd-01-chi-chit.yaml | 16 ++------- .../clickhouse-operator-install-ansible.yaml | 32 ++++-------------- ...house-operator-install-bundle-v1beta1.yaml | 33 ++++--------------- .../clickhouse-operator-install-bundle.yaml | 32 ++++-------------- ...use-operator-install-template-v1beta1.yaml | 33 ++++--------------- .../clickhouse-operator-install-template.yaml | 32 ++++-------------- .../clickhouse-operator-install-tf.yaml | 32 ++++-------------- deploy/operator/parts/crd.yaml | 32 ++++-------------- 8 files changed, 45 insertions(+), 197 deletions(-) diff --git a/deploy/builder/templates-install-bundle/clickhouse-operator-install-yaml-template-01-section-crd-01-chi-chit.yaml b/deploy/builder/templates-install-bundle/clickhouse-operator-install-yaml-template-01-section-crd-01-chi-chit.yaml index 901b89b54..650ad03d7 100644 --- a/deploy/builder/templates-install-bundle/clickhouse-operator-install-yaml-template-01-section-crd-01-chi-chit.yaml +++ b/deploy/builder/templates-install-bundle/clickhouse-operator-install-yaml-template-01-section-crd-01-chi-chit.yaml @@ -53,11 +53,10 @@ spec: type: string description: Resource status jsonPath: .status.status - - name: hosts-unchanged + - name: hosts-completed type: integer - description: Unchanged hosts count - priority: 1 # show in wide view - jsonPath: .status.hostsUnchanged + description: Completed hosts count + jsonPath: .status.hostsCompleted - name: hosts-updated type: integer description: Updated hosts count @@ -68,20 +67,11 @@ spec: description: Added hosts count priority: 1 # show in wide view jsonPath: .status.hostsAdded - - name: hosts-completed - type: integer - description: Completed hosts count - jsonPath: .status.hostsCompleted - name: hosts-deleted type: integer description: Hosts deleted count priority: 1 # show in wide view jsonPath: .status.hostsDeleted - - name: hosts-delete - type: integer - description: Hosts to be deleted count - priority: 1 # show in wide view - jsonPath: .status.hostsDelete - name: endpoint type: string description: Client access endpoint diff --git a/deploy/operator/clickhouse-operator-install-ansible.yaml b/deploy/operator/clickhouse-operator-install-ansible.yaml index a6ac26f2d..e54812c71 100644 --- a/deploy/operator/clickhouse-operator-install-ansible.yaml +++ b/deploy/operator/clickhouse-operator-install-ansible.yaml @@ -60,11 +60,10 @@ spec: type: string description: Resource status jsonPath: .status.status - - name: hosts-unchanged + - name: hosts-completed type: integer - description: Unchanged hosts count - priority: 1 # show in wide view - jsonPath: .status.hostsUnchanged + description: Completed hosts count + jsonPath: .status.hostsCompleted - name: hosts-updated type: integer description: Updated hosts count @@ -75,20 +74,11 @@ spec: description: Added hosts count priority: 1 # show in wide view jsonPath: .status.hostsAdded - - name: hosts-completed - type: integer - description: Completed hosts count - jsonPath: .status.hostsCompleted - name: hosts-deleted type: integer description: Hosts deleted count priority: 1 # show in wide view jsonPath: .status.hostsDeleted - - name: hosts-delete - type: integer - description: Hosts to be deleted count - priority: 1 # show in wide view - jsonPath: .status.hostsDelete - name: endpoint type: string description: Client access endpoint @@ -1352,11 +1342,10 @@ spec: type: string description: Resource status jsonPath: .status.status - - name: hosts-unchanged + - name: hosts-completed type: integer - description: Unchanged hosts count - priority: 1 # show in wide view - jsonPath: .status.hostsUnchanged + description: Completed hosts count + jsonPath: .status.hostsCompleted - name: hosts-updated type: integer description: Updated hosts count @@ -1367,20 +1356,11 @@ spec: description: Added hosts count priority: 1 # show in wide view jsonPath: .status.hostsAdded - - name: hosts-completed - type: integer - description: Completed hosts count - jsonPath: .status.hostsCompleted - name: hosts-deleted type: integer description: Hosts deleted count priority: 1 # show in wide view jsonPath: .status.hostsDeleted - - name: hosts-delete - type: integer - description: Hosts to be deleted count - priority: 1 # show in wide view - jsonPath: .status.hostsDelete - name: endpoint type: string description: Client access endpoint diff --git a/deploy/operator/clickhouse-operator-install-bundle-v1beta1.yaml b/deploy/operator/clickhouse-operator-install-bundle-v1beta1.yaml index 5e15ece95..cc5969463 100644 --- a/deploy/operator/clickhouse-operator-install-bundle-v1beta1.yaml +++ b/deploy/operator/clickhouse-operator-install-bundle-v1beta1.yaml @@ -50,11 +50,10 @@ spec: type: string description: Resource status JSONPath: .status.status - - name: hosts-unchanged + - name: hosts-completed type: integer - description: Unchanged hosts count - priority: 1 # show in wide view - JSONPath: .status.hostsUnchanged + description: Completed hosts count + JSONPath: .status.hostsCompleted - name: hosts-updated type: integer description: Updated hosts count @@ -65,20 +64,11 @@ spec: description: Added hosts count priority: 1 # show in wide view JSONPath: .status.hostsAdded - - name: hosts-completed - type: integer - description: Completed hosts count - JSONPath: .status.hostsCompleted - name: hosts-deleted type: integer description: Hosts deleted count priority: 1 # show in wide view JSONPath: .status.hostsDeleted - - name: hosts-delete - type: integer - description: Hosts to be deleted count - priority: 1 # show in wide view - JSONPath: .status.hostsDelete - name: endpoint type: string description: Client access endpoint @@ -1332,11 +1322,10 @@ spec: type: string description: Resource status JSONPath: .status.status - - name: hosts-unchanged + - name: hosts-completed type: integer - description: Unchanged hosts count - priority: 1 # show in wide view - JSONPath: .status.hostsUnchanged + description: Completed hosts count + JSONPath: .status.hostsCompleted - name: hosts-updated type: integer description: Updated hosts count @@ -1347,20 +1336,11 @@ spec: description: Added hosts count priority: 1 # show in wide view JSONPath: .status.hostsAdded - - name: hosts-completed - type: integer - description: Completed hosts count - JSONPath: .status.hostsCompleted - name: hosts-deleted type: integer description: Hosts deleted count priority: 1 # show in wide view JSONPath: .status.hostsDeleted - - name: hosts-delete - type: integer - description: Hosts to be deleted count - priority: 1 # show in wide view - JSONPath: .status.hostsDelete - name: endpoint type: string description: Client access endpoint @@ -3869,7 +3849,6 @@ metadata: namespace: kube-system labels: clickhouse.altinity.com/chop: 0.24.3 - # Template Parameters: # # NAMESPACE=kube-system diff --git a/deploy/operator/clickhouse-operator-install-bundle.yaml b/deploy/operator/clickhouse-operator-install-bundle.yaml index 3720cfc30..581f9a2d0 100644 --- a/deploy/operator/clickhouse-operator-install-bundle.yaml +++ b/deploy/operator/clickhouse-operator-install-bundle.yaml @@ -53,11 +53,10 @@ spec: type: string description: Resource status jsonPath: .status.status - - name: hosts-unchanged + - name: hosts-completed type: integer - description: Unchanged hosts count - priority: 1 # show in wide view - jsonPath: .status.hostsUnchanged + description: Completed hosts count + jsonPath: .status.hostsCompleted - name: hosts-updated type: integer description: Updated hosts count @@ -68,20 +67,11 @@ spec: description: Added hosts count priority: 1 # show in wide view jsonPath: .status.hostsAdded - - name: hosts-completed - type: integer - description: Completed hosts count - jsonPath: .status.hostsCompleted - name: hosts-deleted type: integer description: Hosts deleted count priority: 1 # show in wide view jsonPath: .status.hostsDeleted - - name: hosts-delete - type: integer - description: Hosts to be deleted count - priority: 1 # show in wide view - jsonPath: .status.hostsDelete - name: endpoint type: string description: Client access endpoint @@ -1345,11 +1335,10 @@ spec: type: string description: Resource status jsonPath: .status.status - - name: hosts-unchanged + - name: hosts-completed type: integer - description: Unchanged hosts count - priority: 1 # show in wide view - jsonPath: .status.hostsUnchanged + description: Completed hosts count + jsonPath: .status.hostsCompleted - name: hosts-updated type: integer description: Updated hosts count @@ -1360,20 +1349,11 @@ spec: description: Added hosts count priority: 1 # show in wide view jsonPath: .status.hostsAdded - - name: hosts-completed - type: integer - description: Completed hosts count - jsonPath: .status.hostsCompleted - name: hosts-deleted type: integer description: Hosts deleted count priority: 1 # show in wide view jsonPath: .status.hostsDeleted - - name: hosts-delete - type: integer - description: Hosts to be deleted count - priority: 1 # show in wide view - jsonPath: .status.hostsDelete - name: endpoint type: string description: Client access endpoint diff --git a/deploy/operator/clickhouse-operator-install-template-v1beta1.yaml b/deploy/operator/clickhouse-operator-install-template-v1beta1.yaml index 8abc631a1..13c739b77 100644 --- a/deploy/operator/clickhouse-operator-install-template-v1beta1.yaml +++ b/deploy/operator/clickhouse-operator-install-template-v1beta1.yaml @@ -50,11 +50,10 @@ spec: type: string description: Resource status JSONPath: .status.status - - name: hosts-unchanged + - name: hosts-completed type: integer - description: Unchanged hosts count - priority: 1 # show in wide view - JSONPath: .status.hostsUnchanged + description: Completed hosts count + JSONPath: .status.hostsCompleted - name: hosts-updated type: integer description: Updated hosts count @@ -65,20 +64,11 @@ spec: description: Added hosts count priority: 1 # show in wide view JSONPath: .status.hostsAdded - - name: hosts-completed - type: integer - description: Completed hosts count - JSONPath: .status.hostsCompleted - name: hosts-deleted type: integer description: Hosts deleted count priority: 1 # show in wide view JSONPath: .status.hostsDeleted - - name: hosts-delete - type: integer - description: Hosts to be deleted count - priority: 1 # show in wide view - JSONPath: .status.hostsDelete - name: endpoint type: string description: Client access endpoint @@ -1332,11 +1322,10 @@ spec: type: string description: Resource status JSONPath: .status.status - - name: hosts-unchanged + - name: hosts-completed type: integer - description: Unchanged hosts count - priority: 1 # show in wide view - JSONPath: .status.hostsUnchanged + description: Completed hosts count + JSONPath: .status.hostsCompleted - name: hosts-updated type: integer description: Updated hosts count @@ -1347,20 +1336,11 @@ spec: description: Added hosts count priority: 1 # show in wide view JSONPath: .status.hostsAdded - - name: hosts-completed - type: integer - description: Completed hosts count - JSONPath: .status.hostsCompleted - name: hosts-deleted type: integer description: Hosts deleted count priority: 1 # show in wide view JSONPath: .status.hostsDeleted - - name: hosts-delete - type: integer - description: Hosts to be deleted count - priority: 1 # show in wide view - JSONPath: .status.hostsDelete - name: endpoint type: string description: Client access endpoint @@ -3869,7 +3849,6 @@ metadata: namespace: ${OPERATOR_NAMESPACE} labels: clickhouse.altinity.com/chop: 0.24.3 - # Template Parameters: # # NAMESPACE=${OPERATOR_NAMESPACE} diff --git a/deploy/operator/clickhouse-operator-install-template.yaml b/deploy/operator/clickhouse-operator-install-template.yaml index 3dc0196de..b5cab6eb3 100644 --- a/deploy/operator/clickhouse-operator-install-template.yaml +++ b/deploy/operator/clickhouse-operator-install-template.yaml @@ -53,11 +53,10 @@ spec: type: string description: Resource status jsonPath: .status.status - - name: hosts-unchanged + - name: hosts-completed type: integer - description: Unchanged hosts count - priority: 1 # show in wide view - jsonPath: .status.hostsUnchanged + description: Completed hosts count + jsonPath: .status.hostsCompleted - name: hosts-updated type: integer description: Updated hosts count @@ -68,20 +67,11 @@ spec: description: Added hosts count priority: 1 # show in wide view jsonPath: .status.hostsAdded - - name: hosts-completed - type: integer - description: Completed hosts count - jsonPath: .status.hostsCompleted - name: hosts-deleted type: integer description: Hosts deleted count priority: 1 # show in wide view jsonPath: .status.hostsDeleted - - name: hosts-delete - type: integer - description: Hosts to be deleted count - priority: 1 # show in wide view - jsonPath: .status.hostsDelete - name: endpoint type: string description: Client access endpoint @@ -1345,11 +1335,10 @@ spec: type: string description: Resource status jsonPath: .status.status - - name: hosts-unchanged + - name: hosts-completed type: integer - description: Unchanged hosts count - priority: 1 # show in wide view - jsonPath: .status.hostsUnchanged + description: Completed hosts count + jsonPath: .status.hostsCompleted - name: hosts-updated type: integer description: Updated hosts count @@ -1360,20 +1349,11 @@ spec: description: Added hosts count priority: 1 # show in wide view jsonPath: .status.hostsAdded - - name: hosts-completed - type: integer - description: Completed hosts count - jsonPath: .status.hostsCompleted - name: hosts-deleted type: integer description: Hosts deleted count priority: 1 # show in wide view jsonPath: .status.hostsDeleted - - name: hosts-delete - type: integer - description: Hosts to be deleted count - priority: 1 # show in wide view - jsonPath: .status.hostsDelete - name: endpoint type: string description: Client access endpoint diff --git a/deploy/operator/clickhouse-operator-install-tf.yaml b/deploy/operator/clickhouse-operator-install-tf.yaml index a5588e0b0..5c164e9b6 100644 --- a/deploy/operator/clickhouse-operator-install-tf.yaml +++ b/deploy/operator/clickhouse-operator-install-tf.yaml @@ -60,11 +60,10 @@ spec: type: string description: Resource status jsonPath: .status.status - - name: hosts-unchanged + - name: hosts-completed type: integer - description: Unchanged hosts count - priority: 1 # show in wide view - jsonPath: .status.hostsUnchanged + description: Completed hosts count + jsonPath: .status.hostsCompleted - name: hosts-updated type: integer description: Updated hosts count @@ -75,20 +74,11 @@ spec: description: Added hosts count priority: 1 # show in wide view jsonPath: .status.hostsAdded - - name: hosts-completed - type: integer - description: Completed hosts count - jsonPath: .status.hostsCompleted - name: hosts-deleted type: integer description: Hosts deleted count priority: 1 # show in wide view jsonPath: .status.hostsDeleted - - name: hosts-delete - type: integer - description: Hosts to be deleted count - priority: 1 # show in wide view - jsonPath: .status.hostsDelete - name: endpoint type: string description: Client access endpoint @@ -1352,11 +1342,10 @@ spec: type: string description: Resource status jsonPath: .status.status - - name: hosts-unchanged + - name: hosts-completed type: integer - description: Unchanged hosts count - priority: 1 # show in wide view - jsonPath: .status.hostsUnchanged + description: Completed hosts count + jsonPath: .status.hostsCompleted - name: hosts-updated type: integer description: Updated hosts count @@ -1367,20 +1356,11 @@ spec: description: Added hosts count priority: 1 # show in wide view jsonPath: .status.hostsAdded - - name: hosts-completed - type: integer - description: Completed hosts count - jsonPath: .status.hostsCompleted - name: hosts-deleted type: integer description: Hosts deleted count priority: 1 # show in wide view jsonPath: .status.hostsDeleted - - name: hosts-delete - type: integer - description: Hosts to be deleted count - priority: 1 # show in wide view - jsonPath: .status.hostsDelete - name: endpoint type: string description: Client access endpoint diff --git a/deploy/operator/parts/crd.yaml b/deploy/operator/parts/crd.yaml index b980729c7..4f38c81ba 100644 --- a/deploy/operator/parts/crd.yaml +++ b/deploy/operator/parts/crd.yaml @@ -53,11 +53,10 @@ spec: type: string description: Resource status jsonPath: .status.status - - name: hosts-unchanged + - name: hosts-completed type: integer - description: Unchanged hosts count - priority: 1 # show in wide view - jsonPath: .status.hostsUnchanged + description: Completed hosts count + jsonPath: .status.hostsCompleted - name: hosts-updated type: integer description: Updated hosts count @@ -68,20 +67,11 @@ spec: description: Added hosts count priority: 1 # show in wide view jsonPath: .status.hostsAdded - - name: hosts-completed - type: integer - description: Completed hosts count - jsonPath: .status.hostsCompleted - name: hosts-deleted type: integer description: Hosts deleted count priority: 1 # show in wide view jsonPath: .status.hostsDeleted - - name: hosts-delete - type: integer - description: Hosts to be deleted count - priority: 1 # show in wide view - jsonPath: .status.hostsDelete - name: endpoint type: string description: Client access endpoint @@ -1995,11 +1985,10 @@ spec: type: string description: Resource status jsonPath: .status.status - - name: hosts-unchanged + - name: hosts-completed type: integer - description: Unchanged hosts count - priority: 1 # show in wide view - jsonPath: .status.hostsUnchanged + description: Completed hosts count + jsonPath: .status.hostsCompleted - name: hosts-updated type: integer description: Updated hosts count @@ -2010,20 +1999,11 @@ spec: description: Added hosts count priority: 1 # show in wide view jsonPath: .status.hostsAdded - - name: hosts-completed - type: integer - description: Completed hosts count - jsonPath: .status.hostsCompleted - name: hosts-deleted type: integer description: Hosts deleted count priority: 1 # show in wide view jsonPath: .status.hostsDeleted - - name: hosts-delete - type: integer - description: Hosts to be deleted count - priority: 1 # show in wide view - jsonPath: .status.hostsDelete - name: endpoint type: string description: Client access endpoint From 21eea15c9822de03c9ed2203420df0f402dfc39d Mon Sep 17 00:00:00 2001 From: alz Date: Thu, 19 Dec 2024 18:07:51 +0300 Subject: [PATCH 058/161] Suppress github specific error --- tests/e2e/test_operator.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/e2e/test_operator.py b/tests/e2e/test_operator.py index 8f2cb888f..7c174729a 100644 --- a/tests/e2e/test_operator.py +++ b/tests/e2e/test_operator.py @@ -217,10 +217,10 @@ def check_operator_restart(chi, wait_objects, pod, shell=None): with When("Restart operator"): util.restart_operator(shell=shell) time.sleep(15) - print(f"wait objects") + kubectl.wait_objects(chi, wait_objects, shell=shell) - print(f"wait chi status") kubectl.wait_chi_status(chi, "Completed", shell=shell) + new_start_time = kubectl.get_field("pod", pod, ".status.startTime", shell=shell) with Then("ClickHouse pods should not be restarted during operator's restart"): @@ -3336,11 +3336,13 @@ def run_select_query(self, host, user, password, query, res1, res2, trigger_even cnt_test = kubectl.launch(cmd, ok_to_fail=True, shell=shell) if cnt_test == res1: ok += 1 - if cnt_test == res2: + elif cnt_test == res2: partial += 1 partial_runs.append(run) partial_runs.append(now) - if cnt_test != res1 and cnt_test != res2: + elif "Unknown stream id" in cnt_test: + print("Ignore unknown stream id error: " + cnt_test) + elif cnt_test != res1 and cnt_test != res2: errors += 1 error_runs.append(run) error_runs.append(now) From d5b29953cf0d4c03515ef23a0b0e12b833c654c1 Mon Sep 17 00:00:00 2001 From: alz Date: Fri, 20 Dec 2024 14:26:27 +0300 Subject: [PATCH 059/161] Stabilizing tests --- tests/e2e/manifests/chi/test-005-acm.yaml | 2 +- .../manifests/chit/tpl-clickhouse-stable.yaml | 3 +- tests/e2e/test_operator.py | 35 +++++++++++-------- 3 files changed, 23 insertions(+), 17 deletions(-) diff --git a/tests/e2e/manifests/chi/test-005-acm.yaml b/tests/e2e/manifests/chi/test-005-acm.yaml index 2d925213c..2be9bf44f 100644 --- a/tests/e2e/manifests/chi/test-005-acm.yaml +++ b/tests/e2e/manifests/chi/test-005-acm.yaml @@ -13,7 +13,7 @@ spec: fsGroup: 101 containers: - name: clickhouse-pod - image: clickhouse/clickhouse-server:24.8.5.115 + image: clickhouse/clickhouse-server:24.8.8.17 ports: - name: http containerPort: 8123 diff --git a/tests/e2e/manifests/chit/tpl-clickhouse-stable.yaml b/tests/e2e/manifests/chit/tpl-clickhouse-stable.yaml index ba5ee4d0d..3829b2639 100644 --- a/tests/e2e/manifests/chit/tpl-clickhouse-stable.yaml +++ b/tests/e2e/manifests/chit/tpl-clickhouse-stable.yaml @@ -13,7 +13,6 @@ spec: spec: containers: - name: clickhouse-pod - # image: clickhouse/clickhouse-server:23.8.8.21.altinitystable - image: clickhouse/clickhouse-server:24.8.5.115 + image: clickhouse/clickhouse-server:24.8.8.17 imagePullPolicy: IfNotPresent diff --git a/tests/e2e/test_operator.py b/tests/e2e/test_operator.py index 7c174729a..6b5329432 100644 --- a/tests/e2e/test_operator.py +++ b/tests/e2e/test_operator.py @@ -250,7 +250,7 @@ def test_operator_restart(self, manifest, service, version=None): }, ) - wait_for_cluster(chi, cluster, 2) + wait_for_cluster(chi, cluster, 2, 1) with Then("Create tables"): for h in [f"chi-{chi}-{cluster}-0-0-0", f"chi-{chi}-{cluster}-1-0-0"]: @@ -309,6 +309,7 @@ def test_operator_restart(self, manifest, service, version=None): pod=f"chi-{chi}-{cluster}-0-0-0" ) trigger_event.set() + time.sleep(5) # let threads to finish join() # with Then("I recreate shell"): @@ -462,6 +463,7 @@ def test_008_3(self): shell=shell_2 ) trigger_event.set() + time.sleep(5) # let threads to finish join() # with Then("I recreate shell"): @@ -538,6 +540,7 @@ def test_operator_upgrade(self, manifest, service, version_from, version_to=None kubectl.wait_objects(chi, {"statefulset": 2, "pod": 2, "service": 3}) trigger_event.set() + time.sleep(5) # let threads to finish join() # with Then("I recreate shell"): @@ -1312,7 +1315,7 @@ def wait_for_cluster(chi, cluster, num_shards, num_replicas=0, pwd="", force_wai assert shards == str(num_shards) if num_replicas > 0: - with By(f"ClickHouse recognizes {num_replicas} replicas shard in the cluster {cluster}"): + with By(f"ClickHouse recognizes {num_replicas} replicas in the cluster {cluster}"): for shard in range(num_shards): for replica in range(num_replicas): replicas = "" @@ -3314,10 +3317,10 @@ def test_031(self): def run_select_query(self, host, user, password, query, res1, res2, trigger_event, shell=None): """Run a select query in parallel until the stop signal is received.""" - client_pod = "clickhouse-client" + client_pod = "clickhouse-select-client" - with When("fCreate {client_pod} pod"): - kubectl.launch(f'run {client_pod} --image={current().context.clickhouse_version} -- /bin/sh -c "sleep 3600"', shell=shell) + with When(f"Create {client_pod} pod"): + kubectl.launch(f'run {client_pod} --image={current().context.clickhouse_version} -- /bin/sh -c "while true; do sleep 5; done;"', shell=shell) kubectl.wait_pod_status(client_pod, "Running", shell=shell) ok = 0 @@ -3327,19 +3330,24 @@ def run_select_query(self, host, user, password, query, res1, res2, trigger_even partial_runs = [] error_runs = [] - cmd = f'exec -n {self.context.test_namespace} {client_pod} -- clickhouse-client --user={user} --password={password} -h {host} -q "{query}"' + def cmd(query): + return f'exec -n {self.context.test_namespace} {client_pod} -- clickhouse-client --user={user} --password={password} -h {host} -q "{query}"' + with Then("Run select queries until receiving a stop event"): while not trigger_event.is_set(): run += 1 # Adjust time to glog's format now = datetime.utcnow().strftime("%H:%M:%S.%f") - cnt_test = kubectl.launch(cmd, ok_to_fail=True, shell=shell) + cnt_test = kubectl.launch(cmd(query), ok_to_fail=True, shell=shell) if cnt_test == res1: ok += 1 elif cnt_test == res2: partial += 1 partial_runs.append(run) partial_runs.append(now) + res = kubectl.launch(cmd("select now(), host_name, host_address from system.clusters where cluster = getMacro('cluster')"), ok_to_fail=True, shell=shell) + print("Partial results returned. Here is the current on cluster queries") + print(res) elif "Unknown stream id" in cnt_test: print("Ignore unknown stream id error: " + cnt_test) elif cnt_test != res1 and cnt_test != res2: @@ -3348,7 +3356,7 @@ def run_select_query(self, host, user, password, query, res1, res2, trigger_even error_runs.append(now) print("*** RUN_QUERY ERROR ***") print(cnt_test) - time.sleep(0.5) + time.sleep(1) with Then( f"{run} queries have been executed, of which: " + @@ -3359,8 +3367,6 @@ def run_select_query(self, host, user, password, query, res1, res2, trigger_even f"error runs: {error_runs}" ): assert errors == 0, error() - if partial > 0: - print(f"*** WARNING ***: cluster was partially unavailable, {partial} queries returned incomplete results") # with Finally("I clean up"): # can not cleanup, since threads may join already and shell may be unavailable # with By("deleting pod"): @@ -3371,10 +3377,10 @@ def run_select_query(self, host, user, password, query, res1, res2, trigger_even def run_insert_query(self, host, user, password, query, trigger_event, shell=None): """Run an insert query in parallel until the stop signal is received.""" - client_pod = "clickhouse-insert" + client_pod = "clickhouse-insert-client" - with Then(f"Create {client_pod} pod"): - kubectl.launch(f'run {client_pod} --image={current().context.clickhouse_version} -- /bin/sh -c "sleep 3600"', shell=shell) + with When(f"Create {client_pod} pod"): + kubectl.launch(f'run {client_pod} --image={current().context.clickhouse_version} -- /bin/sh -c "while true; do sleep 5; done;"', shell=shell) kubectl.wait_pod_status(client_pod, "Running", shell=shell) ok = 0 @@ -3390,7 +3396,7 @@ def run_insert_query(self, host, user, password, query, trigger_event, shell=Non else: note(f"WTF res={res}") errors += 1 - time.sleep(0.5) + time.sleep(1) with Then(f"{ok} inserts have been executed with no errors, {errors} inserts have failed"): assert errors == 0, error() @@ -3495,6 +3501,7 @@ def test_032(self): ) trigger_event.set() + time.sleep(5) # let threads to finish join() # with Then("I recreate shell"): From 7cf2cd31b569176b1a46130a748171c9b44f701c Mon Sep 17 00:00:00 2001 From: alz Date: Fri, 20 Dec 2024 19:39:59 +0300 Subject: [PATCH 060/161] Check the host is in real cluster instead of all-sharded --- pkg/model/chi/schemer/schemer.go | 4 ++-- pkg/model/chi/schemer/sql.go | 7 +++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/pkg/model/chi/schemer/schemer.go b/pkg/model/chi/schemer/schemer.go index 7f133ccbb..99dc00a7e 100644 --- a/pkg/model/chi/schemer/schemer.go +++ b/pkg/model/chi/schemer/schemer.go @@ -130,9 +130,9 @@ func (s *ClusterSchemer) HostDropTables(ctx context.Context, host *api.Host) err // IsHostInCluster checks whether host is a member of at least one ClickHouse cluster func (s *ClusterSchemer) IsHostInCluster(ctx context.Context, host *api.Host) bool { inside := false - SQLs := []string{s.sqlHostInCluster()} + sql := s.sqlHostInCluster(host.Runtime.Address.ClusterName) opts := clickhouse.NewQueryOptions().SetSilent(true) - err := s.ExecHost(ctx, host, SQLs, opts) + err := s.ExecHost(ctx, host, []string{sql}, opts) if err == nil { log.V(1).M(host).F().Info("The host %s is inside the cluster", host.GetName()) inside = true diff --git a/pkg/model/chi/schemer/sql.go b/pkg/model/chi/schemer/sql.go index 4387ff872..a91b8c7f5 100644 --- a/pkg/model/chi/schemer/sql.go +++ b/pkg/model/chi/schemer/sql.go @@ -22,7 +22,6 @@ import ( api "github.com/altinity/clickhouse-operator/pkg/apis/clickhouse.altinity.com/v1" "github.com/altinity/clickhouse-operator/pkg/interfaces" - "github.com/altinity/clickhouse-operator/pkg/model/chi/config" ) const ignoredDBs = `'system', 'information_schema', 'INFORMATION_SCHEMA'` @@ -246,8 +245,8 @@ func (s *ClusterSchemer) sqlVersion() string { return `SELECT version()` } -func (s *ClusterSchemer) sqlHostInCluster() string { - // TODO: Change to select count() query to avoid exception in operator and ClickHouse logs +func (s *ClusterSchemer) sqlHostInCluster(cluster string) string { + // TODO: Change throwIf to select count() query to avoid exception in operator and ClickHouse logs return heredoc.Docf(` SELECT throwIf(count()=0) @@ -256,6 +255,6 @@ func (s *ClusterSchemer) sqlHostInCluster() string { WHERE cluster='%s' AND is_local `, - config.AllShardsOneReplicaClusterName, + cluster, ) } From 8c731eb0a4928ac7fa3943e0c56ace55ee66a452 Mon Sep 17 00:00:00 2001 From: alz Date: Fri, 20 Dec 2024 19:58:17 +0300 Subject: [PATCH 061/161] Switch from throwing an exception to simple count() query --- pkg/model/chi/schemer/schemer.go | 5 ++--- pkg/model/chi/schemer/sql.go | 3 +-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/pkg/model/chi/schemer/schemer.go b/pkg/model/chi/schemer/schemer.go index 99dc00a7e..34e6350f0 100644 --- a/pkg/model/chi/schemer/schemer.go +++ b/pkg/model/chi/schemer/schemer.go @@ -131,9 +131,8 @@ func (s *ClusterSchemer) HostDropTables(ctx context.Context, host *api.Host) err func (s *ClusterSchemer) IsHostInCluster(ctx context.Context, host *api.Host) bool { inside := false sql := s.sqlHostInCluster(host.Runtime.Address.ClusterName) - opts := clickhouse.NewQueryOptions().SetSilent(true) - err := s.ExecHost(ctx, host, []string{sql}, opts) - if err == nil { + res, err := s.QueryHostString(ctx, host, sql) + if err == nil && res == "1" { log.V(1).M(host).F().Info("The host %s is inside the cluster", host.GetName()) inside = true } else { diff --git a/pkg/model/chi/schemer/sql.go b/pkg/model/chi/schemer/sql.go index a91b8c7f5..281448358 100644 --- a/pkg/model/chi/schemer/sql.go +++ b/pkg/model/chi/schemer/sql.go @@ -246,10 +246,9 @@ func (s *ClusterSchemer) sqlVersion() string { } func (s *ClusterSchemer) sqlHostInCluster(cluster string) string { - // TODO: Change throwIf to select count() query to avoid exception in operator and ClickHouse logs return heredoc.Docf(` SELECT - throwIf(count()=0) + count() FROM system.clusters WHERE From 58c1e1ddafcb718cde64711de5f8992017192a5b Mon Sep 17 00:00:00 2001 From: alz Date: Sat, 21 Dec 2024 12:16:18 +0300 Subject: [PATCH 062/161] Better debug information for wait_for_cluster status --- tests/e2e/test_operator.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/tests/e2e/test_operator.py b/tests/e2e/test_operator.py index 6b5329432..aca5e5da7 100644 --- a/tests/e2e/test_operator.py +++ b/tests/e2e/test_operator.py @@ -1329,7 +1329,7 @@ def wait_for_cluster(chi, cluster, num_shards, num_replicas=0, pwd="", force_wai ) if replicas == str(num_replicas): break - with Then(f"Not ready. {replicas}/{num_replicas} replicas Wait for " + str(i * 5) + " seconds"): + with Then(f"Not ready. {replicas}/{num_replicas} replicas. Wait for " + str(i * 5) + " seconds"): time.sleep(i * 5) assert replicas == str(num_replicas) num_hosts = num_shards * num_replicas @@ -1338,18 +1338,20 @@ def wait_for_cluster(chi, cluster, num_shards, num_replicas=0, pwd="", force_wai for replica in range(num_replicas): hosts = "" for i in range(1, 10): + host=f"chi-{chi}-{cluster}-{shard}-{replica}" hosts = clickhouse.query( chi, - f"select count() from system.clusters where cluster ='{cluster}'", - host=f"chi-{chi}-{cluster}-{shard}-{replica}", + f"select count(), groupArray(host_name) from system.clusters where cluster ='{cluster}'", + host=host, pwd=pwd, with_error=True, ) - if hosts == str(num_hosts): + if hosts.startswith(str(num_hosts)): break - with Then(f"Not ready. {hosts}/{num_hosts} hosts Wait for " + str(i * 5) + " seconds"): + with Then(f"{host} is not ready. Wait for " + str(i * 5) + " seconds"): + print("Found: " + hosts) time.sleep(i * 5) - assert hosts == str(num_hosts) + assert hosts.startswith(str(num_hosts)) @TestScenario @@ -3393,6 +3395,8 @@ def run_insert_query(self, host, user, password, query, trigger_event, shell=Non res = kubectl.launch(cmd, ok_to_fail=True, shell=shell) if res == "": ok += 1 + elif "Unknown stream id" in res: + print("Ignore unknown stream id error: " + res) else: note(f"WTF res={res}") errors += 1 From 294cdb61676020b15b8c281f257e01f85a42273c Mon Sep 17 00:00:00 2001 From: alz Date: Tue, 24 Dec 2024 12:24:04 +0300 Subject: [PATCH 063/161] Fixed error message --- pkg/controller/chi/worker-migrator.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/controller/chi/worker-migrator.go b/pkg/controller/chi/worker-migrator.go index 4d3a5c014..cac24725a 100644 --- a/pkg/controller/chi/worker-migrator.go +++ b/pkg/controller/chi/worker-migrator.go @@ -110,7 +110,7 @@ func (w *worker) migrateTables(ctx context.Context, host *api.Host, opts ...*mig WithEvent(host.GetCR(), a.EventActionCreate, a.EventReasonCreateFailed). WithAction(host.GetCR()). M(host).F(). - Error("ERROR add tables added successfully on shard/host:%d/%d cluster:%s err:%v", + Error("ERROR add tables failed on shard/host:%d/%d cluster:%s err:%v", host.Runtime.Address.ShardIndex, host.Runtime.Address.ReplicaIndex, host.Runtime.Address.ClusterName, err) } return err From 7c17d44fc4b8a378f2960426cc9e31636f6eee41 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Mon, 6 Jan 2025 11:38:41 +0300 Subject: [PATCH 064/161] env: manifests --- deploy/operator/clickhouse-operator-install-bundle-v1beta1.yaml | 1 + .../operator/clickhouse-operator-install-template-v1beta1.yaml | 1 + 2 files changed, 2 insertions(+) diff --git a/deploy/operator/clickhouse-operator-install-bundle-v1beta1.yaml b/deploy/operator/clickhouse-operator-install-bundle-v1beta1.yaml index cc5969463..a5c79b58a 100644 --- a/deploy/operator/clickhouse-operator-install-bundle-v1beta1.yaml +++ b/deploy/operator/clickhouse-operator-install-bundle-v1beta1.yaml @@ -3849,6 +3849,7 @@ metadata: namespace: kube-system labels: clickhouse.altinity.com/chop: 0.24.3 + # Template Parameters: # # NAMESPACE=kube-system diff --git a/deploy/operator/clickhouse-operator-install-template-v1beta1.yaml b/deploy/operator/clickhouse-operator-install-template-v1beta1.yaml index 13c739b77..8905d2068 100644 --- a/deploy/operator/clickhouse-operator-install-template-v1beta1.yaml +++ b/deploy/operator/clickhouse-operator-install-template-v1beta1.yaml @@ -3849,6 +3849,7 @@ metadata: namespace: ${OPERATOR_NAMESPACE} labels: clickhouse.altinity.com/chop: 0.24.3 + # Template Parameters: # # NAMESPACE=${OPERATOR_NAMESPACE} From 15ec87742d11c709c12d9ee25a8ef8ced50e4619 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Mon, 6 Jan 2025 11:38:53 +0300 Subject: [PATCH 065/161] env: helm charts --- ...useinstallations.clickhouse.altinity.com.yaml | 16 +++------------- ...llationtemplates.clickhouse.altinity.com.yaml | 16 +++------------- 2 files changed, 6 insertions(+), 26 deletions(-) diff --git a/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhouseinstallations.clickhouse.altinity.com.yaml b/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhouseinstallations.clickhouse.altinity.com.yaml index 2227b3732..4cc84176a 100644 --- a/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhouseinstallations.clickhouse.altinity.com.yaml +++ b/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhouseinstallations.clickhouse.altinity.com.yaml @@ -53,11 +53,10 @@ spec: type: string description: Resource status jsonPath: .status.status - - name: hosts-unchanged + - name: hosts-completed type: integer - description: Unchanged hosts count - priority: 1 # show in wide view - jsonPath: .status.hostsUnchanged + description: Completed hosts count + jsonPath: .status.hostsCompleted - name: hosts-updated type: integer description: Updated hosts count @@ -68,20 +67,11 @@ spec: description: Added hosts count priority: 1 # show in wide view jsonPath: .status.hostsAdded - - name: hosts-completed - type: integer - description: Completed hosts count - jsonPath: .status.hostsCompleted - name: hosts-deleted type: integer description: Hosts deleted count priority: 1 # show in wide view jsonPath: .status.hostsDeleted - - name: hosts-delete - type: integer - description: Hosts to be deleted count - priority: 1 # show in wide view - jsonPath: .status.hostsDelete - name: endpoint type: string description: Client access endpoint diff --git a/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhouseinstallationtemplates.clickhouse.altinity.com.yaml b/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhouseinstallationtemplates.clickhouse.altinity.com.yaml index a8fcaf93d..8e98dcc4a 100644 --- a/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhouseinstallationtemplates.clickhouse.altinity.com.yaml +++ b/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhouseinstallationtemplates.clickhouse.altinity.com.yaml @@ -53,11 +53,10 @@ spec: type: string description: Resource status jsonPath: .status.status - - name: hosts-unchanged + - name: hosts-completed type: integer - description: Unchanged hosts count - priority: 1 # show in wide view - jsonPath: .status.hostsUnchanged + description: Completed hosts count + jsonPath: .status.hostsCompleted - name: hosts-updated type: integer description: Updated hosts count @@ -68,20 +67,11 @@ spec: description: Added hosts count priority: 1 # show in wide view jsonPath: .status.hostsAdded - - name: hosts-completed - type: integer - description: Completed hosts count - jsonPath: .status.hostsCompleted - name: hosts-deleted type: integer description: Hosts deleted count priority: 1 # show in wide view jsonPath: .status.hostsDeleted - - name: hosts-delete - type: integer - description: Hosts to be deleted count - priority: 1 # show in wide view - jsonPath: .status.hostsDelete - name: endpoint type: string description: Client access endpoint From 2824a46d2db86501c182919f8e8820a6ec72e458 Mon Sep 17 00:00:00 2001 From: Eugene Klimov Date: Mon, 6 Jan 2025 15:16:32 +0500 Subject: [PATCH 066/161] add tiered s3 storage example (#1607) * add tiered s3 storage example --- .../03-persistent-volume-08-tiered-s3.yaml | 86 +++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 docs/chi-examples/03-persistent-volume-08-tiered-s3.yaml diff --git a/docs/chi-examples/03-persistent-volume-08-tiered-s3.yaml b/docs/chi-examples/03-persistent-volume-08-tiered-s3.yaml new file mode 100644 index 000000000..8d440db4a --- /dev/null +++ b/docs/chi-examples/03-persistent-volume-08-tiered-s3.yaml @@ -0,0 +1,86 @@ +--- +apiVersion: "clickhouse.altinity.com/v1" +kind: "ClickHouseInstallation" +metadata: + name: "s3-tiered" +spec: + configuration: + clusters: + - name: "cluster" + templates: + podTemplate: pod-template + layout: + shardsCount: 1 + replicasCount: 1 + files: + config.d/storage_configuration.xml: | + + + + + s3 + https://sample-bucket.s3.amazonaws.com/s3_disk/{replica} + your_access_key_id + your_secret_access_key + us-east-2 + /var/lib/clickhouse/disks/s3_disk/ + + + cache + s3_disk + /var/lib/clickhouse/disks/s3_cache/ + 10Gi + + + + + +
+ s3_disk +
+
+
+ + +
+ s3_cache +
+
+
+ + + + default + + + s3_cache + + + +
+
+
+ templates: + podTemplates: + - name: pod-template + spec: + containers: + - name: clickhouse + image: clickhouse/clickhouse-server:latest + imagePullPolicy: IfNotPresent + volumeMounts: + - name: data-storage-vc-template-1 + mountPath: /var/lib/clickhouse + command: + - clickhouse-server + - --config-file=/etc/clickhouse-server/config.xml + + volumeClaimTemplates: + - name: data-storage-vc-template-1 + spec: + # storageClassName: standard + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi From 601457c1734abe4925f904188a4297bc55b3bf61 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Mon, 6 Jan 2025 14:45:03 +0300 Subject: [PATCH 067/161] test: xfail flaky chk test --- tests/regression.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/regression.py b/tests/regression.py index 65358db68..6627f423c 100755 --- a/tests/regression.py +++ b/tests/regression.py @@ -7,6 +7,7 @@ xfails = { # test_operator.py + "/regression/e2e.test_operator/test_049*": [(Fail, "Keeper is flaky")], "/regression/e2e.test_operator/test_052*": [(Fail, "Keeper scale-up/scale-down is flaky")], # test_clickhouse.py "/regression/e2e.test_clickhouse/test_ch_001*": [(Fail, "Insert Quorum test need to refactoring")], From b72ae0f36ad086c4f1d1f31397d5be7dd0680f36 Mon Sep 17 00:00:00 2001 From: Eugene Klimov Date: Mon, 6 Jan 2025 19:16:42 +0500 Subject: [PATCH 068/161] change zookeeper readinessProbe to decrease downtime during version upgrade and quorum achievement (#1608) --- .../advanced/05-stateful-set-persistent-volume.yaml | 3 ++- .../advanced/05-stateful-set-volume-emptyDir.yaml | 3 ++- ...okeeper-1-node-1GB-for-tests-only-scaleout-pvc-secure.yaml | 2 +- .../zookeeper-1-node-1GB-for-tests-only-scaleout-pvc.yaml | 2 +- .../zookeeper-1-node-1GB-for-tests-only.yaml | 4 ++-- .../zookeeper-1-node-for-test-probes.yaml | 3 ++- .../quick-start-persistent-volume/zookeeper-1-node.yaml | 4 ++-- .../zookeeper-3-nodes-1GB-for-tests-only-scaleout-pvc.yaml | 2 +- .../zookeeper-3-nodes-1GB-for-tests-only.yaml | 4 ++-- .../zookeeper-3-nodes-for-test-probes.yaml | 3 ++- .../quick-start-persistent-volume/zookeeper-3-nodes.yaml | 4 ++-- .../quick-start-volume-emptyDir/zookeeper-1-node.yaml | 4 ++-- .../quick-start-volume-emptyDir/zookeeper-3-nodes.yaml | 4 ++-- .../zookeeper-operator-1-node-with-custom-probes.yaml | 2 +- .../zookeeper-operator-3-nodes-with-custom-probes.yaml | 2 +- 15 files changed, 25 insertions(+), 21 deletions(-) diff --git a/deploy/zookeeper/zookeeper-manually/advanced/05-stateful-set-persistent-volume.yaml b/deploy/zookeeper/zookeeper-manually/advanced/05-stateful-set-persistent-volume.yaml index 1a8848de1..9deb903fc 100644 --- a/deploy/zookeeper/zookeeper-manually/advanced/05-stateful-set-persistent-volume.yaml +++ b/deploy/zookeeper/zookeeper-manually/advanced/05-stateful-set-persistent-volume.yaml @@ -132,7 +132,8 @@ spec: - bash - -c - "OK=$(echo ruok | nc 127.0.0.1 2181); if [[ \"$OK\" == \"imok\" ]]; then exit 0; else exit 1; fi" - initialDelaySeconds: 10 + initialDelaySeconds: 15 + periodSeconds: 10 timeoutSeconds: 5 livenessProbe: exec: diff --git a/deploy/zookeeper/zookeeper-manually/advanced/05-stateful-set-volume-emptyDir.yaml b/deploy/zookeeper/zookeeper-manually/advanced/05-stateful-set-volume-emptyDir.yaml index c1149bcd0..08f8f9fd7 100644 --- a/deploy/zookeeper/zookeeper-manually/advanced/05-stateful-set-volume-emptyDir.yaml +++ b/deploy/zookeeper/zookeeper-manually/advanced/05-stateful-set-volume-emptyDir.yaml @@ -132,7 +132,8 @@ spec: - bash - -c - "OK=$(echo ruok | nc 127.0.0.1 2181); if [[ \"$OK\" == \"imok\" ]]; then exit 0; else exit 1; fi" - initialDelaySeconds: 10 + initialDelaySeconds: 15 + periodSeconds: 10 timeoutSeconds: 5 livenessProbe: exec: diff --git a/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-1-node-1GB-for-tests-only-scaleout-pvc-secure.yaml b/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-1-node-1GB-for-tests-only-scaleout-pvc-secure.yaml index f2385fcbb..5a0d989d7 100644 --- a/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-1-node-1GB-for-tests-only-scaleout-pvc-secure.yaml +++ b/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-1-node-1GB-for-tests-only-scaleout-pvc-secure.yaml @@ -613,7 +613,7 @@ spec: command: - /conf/zookeeperReady.sh failureThreshold: 3 - initialDelaySeconds: 10 + initialDelaySeconds: 15 periodSeconds: 10 successThreshold: 1 timeoutSeconds: 10 diff --git a/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-1-node-1GB-for-tests-only-scaleout-pvc.yaml b/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-1-node-1GB-for-tests-only-scaleout-pvc.yaml index 9a40392eb..8fd35b843 100644 --- a/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-1-node-1GB-for-tests-only-scaleout-pvc.yaml +++ b/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-1-node-1GB-for-tests-only-scaleout-pvc.yaml @@ -566,7 +566,7 @@ spec: command: - /conf/zookeeperReady.sh failureThreshold: 3 - initialDelaySeconds: 10 + initialDelaySeconds: 15 periodSeconds: 10 successThreshold: 1 timeoutSeconds: 10 diff --git a/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-1-node-1GB-for-tests-only.yaml b/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-1-node-1GB-for-tests-only.yaml index 0aa1e7918..ba5b2e919 100644 --- a/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-1-node-1GB-for-tests-only.yaml +++ b/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-1-node-1GB-for-tests-only.yaml @@ -211,8 +211,8 @@ spec: exit 1; fi ' - initialDelaySeconds: 10 - periodSeconds: 60 + initialDelaySeconds: 15 + periodSeconds: 10 timeoutSeconds: 60 livenessProbe: exec: diff --git a/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-1-node-for-test-probes.yaml b/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-1-node-for-test-probes.yaml index 634c490ce..e45e7b6b2 100644 --- a/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-1-node-for-test-probes.yaml +++ b/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-1-node-for-test-probes.yaml @@ -193,7 +193,8 @@ spec: # - bash # - -xc # - 'OK=$(exec 3<>/dev/tcp/127.0.0.1/2181 ; printf "ruok" >&3 ; IFS=; tee <&3; exec 3<&- ;); if [[ "$OK" == "imok" ]]; then exit 0; else exit 1; fi' -# initialDelaySeconds: 20 +# initialDelaySeconds: 15 +# periodSeconds: 10 # timeoutSeconds: 15 livenessProbe: exec: diff --git a/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-1-node.yaml b/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-1-node.yaml index c24f27789..7e04d8ebf 100644 --- a/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-1-node.yaml +++ b/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-1-node.yaml @@ -230,8 +230,8 @@ spec: exit 1; fi ' - initialDelaySeconds: 10 - periodSeconds: 60 + initialDelaySeconds: 15 + periodSeconds: 10 timeoutSeconds: 60 livenessProbe: exec: diff --git a/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-3-nodes-1GB-for-tests-only-scaleout-pvc.yaml b/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-3-nodes-1GB-for-tests-only-scaleout-pvc.yaml index 9044829dc..fcd77114c 100644 --- a/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-3-nodes-1GB-for-tests-only-scaleout-pvc.yaml +++ b/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-3-nodes-1GB-for-tests-only-scaleout-pvc.yaml @@ -566,7 +566,7 @@ spec: command: - /conf/zookeeperReady.sh failureThreshold: 3 - initialDelaySeconds: 10 + initialDelaySeconds: 15 periodSeconds: 10 successThreshold: 1 timeoutSeconds: 10 diff --git a/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-3-nodes-1GB-for-tests-only.yaml b/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-3-nodes-1GB-for-tests-only.yaml index 68bf22703..dad2d73b6 100644 --- a/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-3-nodes-1GB-for-tests-only.yaml +++ b/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-3-nodes-1GB-for-tests-only.yaml @@ -211,8 +211,8 @@ spec: exit 1; fi ' - initialDelaySeconds: 10 - periodSeconds: 60 + initialDelaySeconds: 15 + periodSeconds: 10 timeoutSeconds: 60 livenessProbe: exec: diff --git a/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-3-nodes-for-test-probes.yaml b/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-3-nodes-for-test-probes.yaml index 8d043a0bc..9cd33ccd9 100644 --- a/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-3-nodes-for-test-probes.yaml +++ b/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-3-nodes-for-test-probes.yaml @@ -193,7 +193,8 @@ spec: # - bash # - -xc # - 'OK=$(exec 3<>/dev/tcp/127.0.0.1/2181 ; printf "ruok" >&3 ; IFS=; tee <&3; exec 3<&- ;); if [[ "$OK" == "imok" ]]; then exit 0; else exit 1; fi' -# initialDelaySeconds: 20 +# initialDelaySeconds: 15 +# periodSeconds: 10 # timeoutSeconds: 15 livenessProbe: exec: diff --git a/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-3-nodes.yaml b/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-3-nodes.yaml index 2ab53183f..85f254c8b 100644 --- a/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-3-nodes.yaml +++ b/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-3-nodes.yaml @@ -230,8 +230,8 @@ spec: exit 1; fi ' - initialDelaySeconds: 10 - periodSeconds: 60 + initialDelaySeconds: 15 + periodSeconds: 10 timeoutSeconds: 60 livenessProbe: exec: diff --git a/deploy/zookeeper/zookeeper-manually/quick-start-volume-emptyDir/zookeeper-1-node.yaml b/deploy/zookeeper/zookeeper-manually/quick-start-volume-emptyDir/zookeeper-1-node.yaml index 247af99b7..a0a66bfcc 100644 --- a/deploy/zookeeper/zookeeper-manually/quick-start-volume-emptyDir/zookeeper-1-node.yaml +++ b/deploy/zookeeper/zookeeper-manually/quick-start-volume-emptyDir/zookeeper-1-node.yaml @@ -230,8 +230,8 @@ spec: exit 1; fi ' - initialDelaySeconds: 10 - periodSeconds: 60 + initialDelaySeconds: 15 + periodSeconds: 10 timeoutSeconds: 60 livenessProbe: exec: diff --git a/deploy/zookeeper/zookeeper-manually/quick-start-volume-emptyDir/zookeeper-3-nodes.yaml b/deploy/zookeeper/zookeeper-manually/quick-start-volume-emptyDir/zookeeper-3-nodes.yaml index e4dece68c..3d66ebb19 100644 --- a/deploy/zookeeper/zookeeper-manually/quick-start-volume-emptyDir/zookeeper-3-nodes.yaml +++ b/deploy/zookeeper/zookeeper-manually/quick-start-volume-emptyDir/zookeeper-3-nodes.yaml @@ -230,8 +230,8 @@ spec: exit 1; fi ' - initialDelaySeconds: 10 - periodSeconds: 60 + initialDelaySeconds: 15 + periodSeconds: 10 timeoutSeconds: 60 livenessProbe: exec: diff --git a/deploy/zookeeper/zookeeper-with-zookeeper-operator/zookeeper-operator-1-node-with-custom-probes.yaml b/deploy/zookeeper/zookeeper-with-zookeeper-operator/zookeeper-operator-1-node-with-custom-probes.yaml index 3a8d95a3c..8031059e4 100644 --- a/deploy/zookeeper/zookeeper-with-zookeeper-operator/zookeeper-operator-1-node-with-custom-probes.yaml +++ b/deploy/zookeeper/zookeeper-with-zookeeper-operator/zookeeper-operator-1-node-with-custom-probes.yaml @@ -474,7 +474,7 @@ spec: command: - /conf/zookeeperReady.sh failureThreshold: 3 - initialDelaySeconds: 10 + initialDelaySeconds: 15 periodSeconds: 10 successThreshold: 1 timeoutSeconds: 10 diff --git a/deploy/zookeeper/zookeeper-with-zookeeper-operator/zookeeper-operator-3-nodes-with-custom-probes.yaml b/deploy/zookeeper/zookeeper-with-zookeeper-operator/zookeeper-operator-3-nodes-with-custom-probes.yaml index b2268a8f2..f9335157a 100644 --- a/deploy/zookeeper/zookeeper-with-zookeeper-operator/zookeeper-operator-3-nodes-with-custom-probes.yaml +++ b/deploy/zookeeper/zookeeper-with-zookeeper-operator/zookeeper-operator-3-nodes-with-custom-probes.yaml @@ -474,7 +474,7 @@ spec: command: - /conf/zookeeperReady.sh failureThreshold: 3 - initialDelaySeconds: 10 + initialDelaySeconds: 15 periodSeconds: 10 successThreshold: 1 timeoutSeconds: 10 From 23eca95d274a8421ceaeed9f2de7edd89f2ea9a3 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Tue, 7 Jan 2025 14:20:33 +0300 Subject: [PATCH 069/161] dev: add stroage to list --- pkg/model/common/tags/labeler/list.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/model/common/tags/labeler/list.go b/pkg/model/common/tags/labeler/list.go index 2f2ee1109..e8d6de179 100644 --- a/pkg/model/common/tags/labeler/list.go +++ b/pkg/model/common/tags/labeler/list.go @@ -33,6 +33,7 @@ const ( LabelReplicaName = "APIGroupName" + "/" + "replica" LabelConfigMap = "APIGroupName" + "/" + "ConfigMap" LabelConfigMapValueCRCommon = "CRCommon" + LabelConfigMapValueCRStorage = "CRStorage" LabelConfigMapValueCRCommonUsers = "CRCommonUsers" LabelConfigMapValueHost = "Host" LabelService = "APIGroupName" + "/" + "Service" From c8636757a410c63bd1733436b650f7e898a6b756 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Tue, 7 Jan 2025 14:20:59 +0300 Subject: [PATCH 070/161] dev: add chi storage --- pkg/model/chi/tags/labeler/list.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/model/chi/tags/labeler/list.go b/pkg/model/chi/tags/labeler/list.go index b2636cf45..c60f036b5 100644 --- a/pkg/model/chi/tags/labeler/list.go +++ b/pkg/model/chi/tags/labeler/list.go @@ -39,6 +39,7 @@ var list = types.List{ labeler.LabelReplicaName: clickhouse_altinity_com.APIGroupName + "/" + "replica", labeler.LabelConfigMap: clickhouse_altinity_com.APIGroupName + "/" + "ConfigMap", labeler.LabelConfigMapValueCRCommon: "ChiCommon", + labeler.LabelConfigMapValueCRStorage: "ChiStorage", labeler.LabelConfigMapValueCRCommonUsers: "ChiCommonUsers", labeler.LabelConfigMapValueHost: "Host", labeler.LabelService: clickhouse_altinity_com.APIGroupName + "/" + "Service", From a593a856ed4aa4abf6dede7cb2e2d3bc11426f69 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Tue, 7 Jan 2025 14:21:36 +0300 Subject: [PATCH 071/161] dev: add cm stroage label --- pkg/interfaces/label_type.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/interfaces/label_type.go b/pkg/interfaces/label_type.go index 2db79e4a0..6d725922c 100644 --- a/pkg/interfaces/label_type.go +++ b/pkg/interfaces/label_type.go @@ -20,6 +20,7 @@ const ( LabelConfigMapCommon LabelType = "Label cm common" LabelConfigMapCommonUsers LabelType = "Label cm common users" LabelConfigMapHost LabelType = "Label cm host" + LabelConfigMapStorage LabelType = "Label cm storage" ) const ( From d8d2fc381f68952d165f715d642692145a1566ad Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Tue, 7 Jan 2025 14:21:57 +0300 Subject: [PATCH 072/161] dev: add storage annotation --- pkg/interfaces/annotate_type.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/interfaces/annotate_type.go b/pkg/interfaces/annotate_type.go index f29166726..563b6ad6f 100644 --- a/pkg/interfaces/annotate_type.go +++ b/pkg/interfaces/annotate_type.go @@ -37,4 +37,5 @@ const ( AnnotateConfigMapCommon AnnotateType = "annotate cm common" AnnotateConfigMapCommonUsers AnnotateType = "annotate cm common users" AnnotateConfigMapHost AnnotateType = "annotate cm host" + AnnotateConfigMapStorage AnnotateType = "annotate cm starage" ) From 9532e5a3fc53a253af95d0d65d935535f5e8d6d1 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Tue, 7 Jan 2025 14:22:22 +0300 Subject: [PATCH 073/161] dev: add storage labeler --- pkg/model/chi/tags/labeler/labeler.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pkg/model/chi/tags/labeler/labeler.go b/pkg/model/chi/tags/labeler/labeler.go index 02b0fbb4f..35f0831ea 100644 --- a/pkg/model/chi/tags/labeler/labeler.go +++ b/pkg/model/chi/tags/labeler/labeler.go @@ -41,6 +41,8 @@ func (l *Labeler) Label(what interfaces.LabelType, params ...any) map[string]str return l.labelConfigMapCRCommonUsers() case interfaces.LabelConfigMapHost: return l.labelConfigMapHost(params...) + case interfaces.LabelConfigMapStorage: + return l.labelConfigMapCRStorage() default: return l.Labeler.Label(what, params...) @@ -79,6 +81,15 @@ func (l *Labeler) labelConfigMapHost(params ...any) map[string]string { panic("not enough params for labeler") } +// labelConfigMapCRStorage +func (l *Labeler) labelConfigMapCRStorage() map[string]string { + return util.MergeStringMapsOverwrite( + l.GetCRScope(), + map[string]string{ + l.Get(labeler.LabelConfigMap): l.Get(labeler.LabelConfigMapValueCRStorage), + }) +} + // _labelConfigMapHost func (l *Labeler) _labelConfigMapHost(host *api.Host) map[string]string { return util.MergeStringMapsOverwrite( From 2fe274fabaceef603300ab50c4cb6dc10e65fcc6 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Tue, 7 Jan 2025 14:22:40 +0300 Subject: [PATCH 074/161] dev: add storage annotator --- pkg/model/chi/tags/annotator/annotator.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkg/model/chi/tags/annotator/annotator.go b/pkg/model/chi/tags/annotator/annotator.go index 89dcd0127..7c1a044e2 100644 --- a/pkg/model/chi/tags/annotator/annotator.go +++ b/pkg/model/chi/tags/annotator/annotator.go @@ -38,6 +38,8 @@ func (a *Annotator) Annotate(what interfaces.AnnotateType, params ...any) map[st switch what { case interfaces.AnnotateConfigMapCommon: return a.GetCRScope() + case interfaces.AnnotateConfigMapStorage: + return a.GetCRScope() case interfaces.AnnotateConfigMapCommonUsers: return a.GetCRScope() case interfaces.AnnotateConfigMapHost: From 81790d435961f94c24221ed94fde42ffa99290a4 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Tue, 7 Jan 2025 14:23:09 +0300 Subject: [PATCH 075/161] dev: label and annotate storage --- pkg/controller/chi/kube/cr.go | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pkg/controller/chi/kube/cr.go b/pkg/controller/chi/kube/cr.go index 2732d743e..5ffb5abeb 100644 --- a/pkg/controller/chi/kube/cr.go +++ b/pkg/controller/chi/kube/cr.go @@ -28,6 +28,10 @@ import ( log "github.com/altinity/clickhouse-operator/pkg/announcer" api "github.com/altinity/clickhouse-operator/pkg/apis/clickhouse.altinity.com/v1" commonTypes "github.com/altinity/clickhouse-operator/pkg/apis/common/types" + "github.com/altinity/clickhouse-operator/pkg/interfaces" + commonMacro "github.com/altinity/clickhouse-operator/pkg/model/common/macro" + "github.com/altinity/clickhouse-operator/pkg/model/managers" + "github.com/altinity/clickhouse-operator/pkg/model/chi/macro" chopClientSet "github.com/altinity/clickhouse-operator/pkg/client/clientset/versioned" "github.com/altinity/clickhouse-operator/pkg/controller" "github.com/altinity/clickhouse-operator/pkg/util" @@ -36,12 +40,14 @@ import ( type CR struct { chopClient chopClientSet.Interface kubeClient kube.Interface + macro interfaces.IMacro } func NewCR(chopClient chopClientSet.Interface, kubeClient kube.Interface) *CR { return &CR{ chopClient: chopClient, kubeClient: kubeClient, + macro: commonMacro.New(macro.List), } } @@ -198,10 +204,14 @@ func (c *CR) buildResources(chi *api.ClickHouseInstallation) (*api.ClickHouseIns if chi.Status.NormalizedCRCompleted != nil { normalizedCompleted, _ = json.Marshal(chi.Status.NormalizedCRCompleted) } + + tagger := managers.NewTagManager(managers.TagManagerTypeClickHouse, chi) cm := &core.ConfigMap{ ObjectMeta: meta.ObjectMeta{ Namespace: c.buildCMNamespace(chi), Name: c.buildCMName(chi), + Labels: c.macro.Scope(chi).Map(tagger.Label(interfaces.LabelConfigMapStorage)), + Annotations: c.macro.Scope(chi).Map(tagger.Annotate(interfaces.AnnotateConfigMapStorage)), }, Data: map[string]string{ statusNormalized: string(normalized), From e3328af905e8dec9db5fc24141cce949d714691d Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Tue, 7 Jan 2025 14:23:24 +0300 Subject: [PATCH 076/161] dev: format --- pkg/controller/chi/kube/cr.go | 18 +++++++------- pkg/controller/chi/worker-chi-reconciler.go | 26 ++++++++++----------- pkg/interfaces/annotate_type.go | 2 +- pkg/interfaces/label_type.go | 2 +- pkg/model/chi/tags/labeler/list.go | 2 +- pkg/model/common/tags/labeler/list.go | 2 +- 6 files changed, 26 insertions(+), 26 deletions(-) diff --git a/pkg/controller/chi/kube/cr.go b/pkg/controller/chi/kube/cr.go index 5ffb5abeb..5f47554b6 100644 --- a/pkg/controller/chi/kube/cr.go +++ b/pkg/controller/chi/kube/cr.go @@ -28,26 +28,26 @@ import ( log "github.com/altinity/clickhouse-operator/pkg/announcer" api "github.com/altinity/clickhouse-operator/pkg/apis/clickhouse.altinity.com/v1" commonTypes "github.com/altinity/clickhouse-operator/pkg/apis/common/types" + chopClientSet "github.com/altinity/clickhouse-operator/pkg/client/clientset/versioned" + "github.com/altinity/clickhouse-operator/pkg/controller" "github.com/altinity/clickhouse-operator/pkg/interfaces" + "github.com/altinity/clickhouse-operator/pkg/model/chi/macro" commonMacro "github.com/altinity/clickhouse-operator/pkg/model/common/macro" "github.com/altinity/clickhouse-operator/pkg/model/managers" - "github.com/altinity/clickhouse-operator/pkg/model/chi/macro" - chopClientSet "github.com/altinity/clickhouse-operator/pkg/client/clientset/versioned" - "github.com/altinity/clickhouse-operator/pkg/controller" "github.com/altinity/clickhouse-operator/pkg/util" ) type CR struct { chopClient chopClientSet.Interface kubeClient kube.Interface - macro interfaces.IMacro + macro interfaces.IMacro } func NewCR(chopClient chopClientSet.Interface, kubeClient kube.Interface) *CR { return &CR{ chopClient: chopClient, kubeClient: kubeClient, - macro: commonMacro.New(macro.List), + macro: commonMacro.New(macro.List), } } @@ -208,10 +208,10 @@ func (c *CR) buildResources(chi *api.ClickHouseInstallation) (*api.ClickHouseIns tagger := managers.NewTagManager(managers.TagManagerTypeClickHouse, chi) cm := &core.ConfigMap{ ObjectMeta: meta.ObjectMeta{ - Namespace: c.buildCMNamespace(chi), - Name: c.buildCMName(chi), - Labels: c.macro.Scope(chi).Map(tagger.Label(interfaces.LabelConfigMapStorage)), - Annotations: c.macro.Scope(chi).Map(tagger.Annotate(interfaces.AnnotateConfigMapStorage)), + Namespace: c.buildCMNamespace(chi), + Name: c.buildCMName(chi), + Labels: c.macro.Scope(chi).Map(tagger.Label(interfaces.LabelConfigMapStorage)), + Annotations: c.macro.Scope(chi).Map(tagger.Annotate(interfaces.AnnotateConfigMapStorage)), }, Data: map[string]string{ statusNormalized: string(normalized), diff --git a/pkg/controller/chi/worker-chi-reconciler.go b/pkg/controller/chi/worker-chi-reconciler.go index 558fc5877..d7432b6f7 100644 --- a/pkg/controller/chi/worker-chi-reconciler.go +++ b/pkg/controller/chi/worker-chi-reconciler.go @@ -719,19 +719,19 @@ func (w *worker) reconcileHostPrepare(ctx context.Context, host *api.Host) error // Check whether ClickHouse is running and accessible and what version is available // alz 18.12.2024: Host may be down or not accessible, so no reason to wait -// if version, err := w.getHostClickHouseVersion(ctx, host, versionOptions{skipNew: true, skipStoppedAncestor: true}); err == nil { -// w.a.V(1). -// WithEvent(host.GetCR(), a.EventActionReconcile, a.EventReasonReconcileStarted). -// WithAction(host.GetCR()). -// M(host).F(). -// Info("Reconcile Host start. Host: %s ClickHouse version running: %s", host.GetName(), version) -// } else { -// w.a.V(1). -// WithEvent(host.GetCR(), a.EventActionReconcile, a.EventReasonReconcileStarted). -// WithAction(host.GetCR()). -// M(host).F(). -// Warning("Reconcile Host start. Host: %s Failed to get ClickHouse version: %s", host.GetName(), version) -// } + // if version, err := w.getHostClickHouseVersion(ctx, host, versionOptions{skipNew: true, skipStoppedAncestor: true}); err == nil { + // w.a.V(1). + // WithEvent(host.GetCR(), a.EventActionReconcile, a.EventReasonReconcileStarted). + // WithAction(host.GetCR()). + // M(host).F(). + // Info("Reconcile Host start. Host: %s ClickHouse version running: %s", host.GetName(), version) + // } else { + // w.a.V(1). + // WithEvent(host.GetCR(), a.EventActionReconcile, a.EventReasonReconcileStarted). + // WithAction(host.GetCR()). + // M(host).F(). + // Warning("Reconcile Host start. Host: %s Failed to get ClickHouse version: %s", host.GetName(), version) + // } if w.excludeHost(ctx, host) { // Need to wait to complete queries only in case host is excluded from the cluster diff --git a/pkg/interfaces/annotate_type.go b/pkg/interfaces/annotate_type.go index 563b6ad6f..bf02a4903 100644 --- a/pkg/interfaces/annotate_type.go +++ b/pkg/interfaces/annotate_type.go @@ -37,5 +37,5 @@ const ( AnnotateConfigMapCommon AnnotateType = "annotate cm common" AnnotateConfigMapCommonUsers AnnotateType = "annotate cm common users" AnnotateConfigMapHost AnnotateType = "annotate cm host" - AnnotateConfigMapStorage AnnotateType = "annotate cm starage" + AnnotateConfigMapStorage AnnotateType = "annotate cm starage" ) diff --git a/pkg/interfaces/label_type.go b/pkg/interfaces/label_type.go index 6d725922c..184de701d 100644 --- a/pkg/interfaces/label_type.go +++ b/pkg/interfaces/label_type.go @@ -20,7 +20,7 @@ const ( LabelConfigMapCommon LabelType = "Label cm common" LabelConfigMapCommonUsers LabelType = "Label cm common users" LabelConfigMapHost LabelType = "Label cm host" - LabelConfigMapStorage LabelType = "Label cm storage" + LabelConfigMapStorage LabelType = "Label cm storage" ) const ( diff --git a/pkg/model/chi/tags/labeler/list.go b/pkg/model/chi/tags/labeler/list.go index c60f036b5..2bf27b5e7 100644 --- a/pkg/model/chi/tags/labeler/list.go +++ b/pkg/model/chi/tags/labeler/list.go @@ -39,7 +39,7 @@ var list = types.List{ labeler.LabelReplicaName: clickhouse_altinity_com.APIGroupName + "/" + "replica", labeler.LabelConfigMap: clickhouse_altinity_com.APIGroupName + "/" + "ConfigMap", labeler.LabelConfigMapValueCRCommon: "ChiCommon", - labeler.LabelConfigMapValueCRStorage: "ChiStorage", + labeler.LabelConfigMapValueCRStorage: "ChiStorage", labeler.LabelConfigMapValueCRCommonUsers: "ChiCommonUsers", labeler.LabelConfigMapValueHost: "Host", labeler.LabelService: clickhouse_altinity_com.APIGroupName + "/" + "Service", diff --git a/pkg/model/common/tags/labeler/list.go b/pkg/model/common/tags/labeler/list.go index e8d6de179..250d1bfb4 100644 --- a/pkg/model/common/tags/labeler/list.go +++ b/pkg/model/common/tags/labeler/list.go @@ -33,7 +33,7 @@ const ( LabelReplicaName = "APIGroupName" + "/" + "replica" LabelConfigMap = "APIGroupName" + "/" + "ConfigMap" LabelConfigMapValueCRCommon = "CRCommon" - LabelConfigMapValueCRStorage = "CRStorage" + LabelConfigMapValueCRStorage = "CRStorage" LabelConfigMapValueCRCommonUsers = "CRCommonUsers" LabelConfigMapValueHost = "Host" LabelService = "APIGroupName" + "/" + "Service" From c2b2b23d915693afad5d5eedd6413a2a07ceab22 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Tue, 7 Jan 2025 14:25:08 +0300 Subject: [PATCH 077/161] dev: naming --- pkg/interfaces/{annotate_type.go => annotate.go} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename pkg/interfaces/{annotate_type.go => annotate.go} (100%) diff --git a/pkg/interfaces/annotate_type.go b/pkg/interfaces/annotate.go similarity index 100% rename from pkg/interfaces/annotate_type.go rename to pkg/interfaces/annotate.go From c1bffc4fa1278f08732f0e7557345327589f79f7 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Wed, 8 Jan 2025 13:44:30 +0300 Subject: [PATCH 078/161] dev: naming --- pkg/interfaces/{cluster_type.go => cluster.go} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename pkg/interfaces/{cluster_type.go => cluster.go} (100%) diff --git a/pkg/interfaces/cluster_type.go b/pkg/interfaces/cluster.go similarity index 100% rename from pkg/interfaces/cluster_type.go rename to pkg/interfaces/cluster.go From 9bd3d7bd3ab227f76be8ff388e086da50cb1d300 Mon Sep 17 00:00:00 2001 From: Eugene Klimov Date: Wed, 8 Jan 2025 15:45:17 +0500 Subject: [PATCH 079/161] remove runningDifference from grafana dashboards to avoid errors in modern clickhouse versions, upgrade plugin versions (#1609) remove runningDifference from grafana dashboards --- .../grafana-dashboard-operator-cr-template.yaml | 4 ++-- .../grafana-dashboard-queries-cr-template.yaml | 2 +- .../files/ClickHouse_Queries_dashboard.json | 6 +++--- grafana-dashboard/ClickHouse_Queries_dashboard.json | 6 +++--- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/deploy/grafana/grafana-with-grafana-operator/grafana-dashboard-operator-cr-template.yaml b/deploy/grafana/grafana-with-grafana-operator/grafana-dashboard-operator-cr-template.yaml index 8c3464771..6dc2246e2 100644 --- a/deploy/grafana/grafana-with-grafana-operator/grafana-dashboard-operator-cr-template.yaml +++ b/deploy/grafana/grafana-with-grafana-operator/grafana-dashboard-operator-cr-template.yaml @@ -14,6 +14,6 @@ spec: # TODO remove this plugin definition after resolve https://github.com/integr8ly/grafana-operator/issues/155 plugins: - name: "vertamedia-clickhouse-datasource" - version: "2.5.4" + version: "3.3.1" - name: "grafana-piechart-panel" - version: "1.6.2" + version: "1.6.4" diff --git a/deploy/grafana/grafana-with-grafana-operator/grafana-dashboard-queries-cr-template.yaml b/deploy/grafana/grafana-with-grafana-operator/grafana-dashboard-queries-cr-template.yaml index a0f4f9a31..fcbb96762 100644 --- a/deploy/grafana/grafana-with-grafana-operator/grafana-dashboard-queries-cr-template.yaml +++ b/deploy/grafana/grafana-with-grafana-operator/grafana-dashboard-queries-cr-template.yaml @@ -13,4 +13,4 @@ spec: datasourceName: "$GRAFANA_PROMETHEUS_DATASOURCE_NAME" plugins: - name: "vertamedia-clickhouse-datasource" - version: "2.5.4" + version: "3.3.1" diff --git a/deploy/helm/clickhouse-operator/files/ClickHouse_Queries_dashboard.json b/deploy/helm/clickhouse-operator/files/ClickHouse_Queries_dashboard.json index f348932a8..ee27a17ed 100644 --- a/deploy/helm/clickhouse-operator/files/ClickHouse_Queries_dashboard.json +++ b/deploy/helm/clickhouse-operator/files/ClickHouse_Queries_dashboard.json @@ -147,8 +147,8 @@ "format": "time_series", "interval": "", "intervalFactor": 2, - "query": "SELECT\r\n t,\r\n arrayMap(a -> (a.1, a.2 / runningDifference(t / 1000)), groupArr)\r\nFROM (\r\n SELECT t, groupArray((q, c)) AS groupArr\r\n FROM (\r\n SELECT\r\n (intDiv(toUInt32(event_time), 2) * 2) * 1000 AS t,\r\n normalizeQuery(query) AS q,\r\n count() c\r\n FROM cluster('all-sharded',system.query_log)\r\n WHERE $timeFilter\r\n AND( ('$type' = '1,2,3,4' AND type != 'QueryStart') OR ('$type' != '1,2,3,4' AND type IN ($type)))\r\n $conditionalTest(AND query_kind IN ($query_kind), $query_kind)\r\n $conditionalTest(AND initial_user IN ($user), $user)\r\n $conditionalTest(AND query_duration_ms >= $min_duration_ms, $min_duration_ms)\r\n $conditionalTest(AND query_duration_ms <= $max_duration_ms, $max_duration_ms)\r\n AND normalized_query_hash GLOBAL IN (\r\n SELECT normalized_query_hash AS h\r\n FROM cluster('all-sharded',system.query_log)\r\n WHERE $timeFilter\r\n AND( ('$type' = '1,2,3,4' AND type != 'QueryStart') OR ('$type' != '1,2,3,4' AND type IN ($type)))\r\n $conditionalTest(AND query_kind IN ($query_kind), $query_kind)\r\n $conditionalTest(AND type IN ($type), $type)\r\n $conditionalTest(AND initial_user IN ($user), $user)\r\n $conditionalTest(AND query_duration_ms >= $min_duration_ms, $min_duration_ms)\r\n $conditionalTest(AND query_duration_ms <= $max_duration_ms, $max_duration_ms)\r\n GROUP BY h\r\n ORDER BY count() DESC\r\n LIMIT $top\r\n SETTINGS skip_unavailable_shards=1\r\n )\r\n GROUP BY t, query\r\n ORDER BY t\r\n )\r\n GROUP BY t\r\n ORDER BY t\r\n) SETTINGS skip_unavailable_shards=1", - "rawQuery": "SELECT\r\n t,\r\n arrayMap(a -> (a.1, a.2 / runningDifference(t / 1000)), groupArr)\r\nFROM (\r\n SELECT t, groupArray((q, c)) AS groupArr\r\n FROM (\r\n SELECT\r\n (intDiv(toUInt32(event_time), 2) * 2) * 1000 AS t,\r\n normalizeQuery(query) AS q,\r\n count() c\r\n FROM cluster('all-sharded',system.query_log)\r\n WHERE event_date >= toDate(1694531137) AND event_date <= toDate(1694534737) AND event_time >= toDateTime(1694531137) AND event_time <= toDateTime(1694534737)\r\n AND( ('1,2,3,4' = '1,2,3,4' AND type != 'QueryStart') OR ('1,2,3,4' != '1,2,3,4' AND type IN (1,2,3,4)))\r\n \r\n \r\n \r\n \r\n AND normalized_query_hash GLOBAL IN (\r\n SELECT normalized_query_hash AS h\r\n FROM cluster('all-sharded',system.query_log)\r\n WHERE event_date >= toDate(1694531137) AND event_date <= toDate(1694534737) AND event_time >= toDateTime(1694531137) AND event_time <= toDateTime(1694534737)\r\n AND( ('1,2,3,4' = '1,2,3,4' AND type != 'QueryStart') OR ('1,2,3,4' != '1,2,3,4' AND type IN (1,2,3,4)))\r\n \r\n \r\n \r\n \r\n \r\n GROUP BY h\r\n ORDER BY count() DESC\r\n LIMIT 30\r\n SETTINGS skip_unavailable_shards=1\r\n )\r\n GROUP BY t, query\r\n ORDER BY t\r\n )\r\n GROUP BY t\r\n ORDER BY t\r\n) SETTINGS skip_unavailable_shards=1", + "query": "SELECT\r\n t,\r\n arrayMap(a -> (a.1, a.2 / (t/1000 - lagInFrame(t/1000,1,0) OVER ()) ), groupArr)\r\nFROM (\r\n SELECT t, groupArray((q, c)) AS groupArr\r\n FROM (\r\n SELECT\r\n (intDiv(toUInt32(event_time), 2) * 2) * 1000 AS t,\r\n normalizeQuery(query) AS q,\r\n count() c\r\n FROM cluster('all-sharded',system.query_log)\r\n WHERE $timeFilter\r\n AND( ('$type' = '1,2,3,4' AND type != 'QueryStart') OR ('$type' != '1,2,3,4' AND type IN ($type)))\r\n $conditionalTest(AND query_kind IN ($query_kind), $query_kind)\r\n $conditionalTest(AND initial_user IN ($user), $user)\r\n $conditionalTest(AND query_duration_ms >= $min_duration_ms, $min_duration_ms)\r\n $conditionalTest(AND query_duration_ms <= $max_duration_ms, $max_duration_ms)\r\n AND normalized_query_hash GLOBAL IN (\r\n SELECT normalized_query_hash AS h\r\n FROM cluster('all-sharded',system.query_log)\r\n WHERE $timeFilter\r\n AND( ('$type' = '1,2,3,4' AND type != 'QueryStart') OR ('$type' != '1,2,3,4' AND type IN ($type)))\r\n $conditionalTest(AND query_kind IN ($query_kind), $query_kind)\r\n $conditionalTest(AND type IN ($type), $type)\r\n $conditionalTest(AND initial_user IN ($user), $user)\r\n $conditionalTest(AND query_duration_ms >= $min_duration_ms, $min_duration_ms)\r\n $conditionalTest(AND query_duration_ms <= $max_duration_ms, $max_duration_ms)\r\n GROUP BY h\r\n ORDER BY count() DESC\r\n LIMIT $top\r\n SETTINGS skip_unavailable_shards=1\r\n )\r\n GROUP BY t, query\r\n ORDER BY t\r\n )\r\n GROUP BY t\r\n ORDER BY t\r\n) SETTINGS skip_unavailable_shards=1", + "rawQuery": "SELECT\r\n t,\r\n arrayMap(a -> (a.1, a.2 / (t/1000 - lagInFrame(t/1000,1,0) OVER ()) ), groupArr)\r\nFROM (\r\n SELECT t, groupArray((q, c)) AS groupArr\r\n FROM (\r\n SELECT\r\n (intDiv(toUInt32(event_time), 2) * 2) * 1000 AS t,\r\n normalizeQuery(query) AS q,\r\n count() c\r\n FROM cluster('all-sharded',system.query_log)\r\n WHERE event_date >= toDate(1694531137) AND event_date <= toDate(1694534737) AND event_time >= toDateTime(1694531137) AND event_time <= toDateTime(1694534737)\r\n AND( ('1,2,3,4' = '1,2,3,4' AND type != 'QueryStart') OR ('1,2,3,4' != '1,2,3,4' AND type IN (1,2,3,4)))\r\n \r\n \r\n \r\n \r\n AND normalized_query_hash GLOBAL IN (\r\n SELECT normalized_query_hash AS h\r\n FROM cluster('all-sharded',system.query_log)\r\n WHERE event_date >= toDate(1694531137) AND event_date <= toDate(1694534737) AND event_time >= toDateTime(1694531137) AND event_time <= toDateTime(1694534737)\r\n AND( ('1,2,3,4' = '1,2,3,4' AND type != 'QueryStart') OR ('1,2,3,4' != '1,2,3,4' AND type IN (1,2,3,4)))\r\n \r\n \r\n \r\n \r\n \r\n GROUP BY h\r\n ORDER BY count() DESC\r\n LIMIT 30\r\n SETTINGS skip_unavailable_shards=1\r\n )\r\n GROUP BY t, query\r\n ORDER BY t\r\n )\r\n GROUP BY t\r\n ORDER BY t\r\n) SETTINGS skip_unavailable_shards=1", "refId": "A", "resultFormat": "time_series", "round": "0s", @@ -743,7 +743,7 @@ "interval": "", "intervalFactor": 2, "query": "$rate(count() c)\nFROM cluster('all-sharded',system.query_log)\nWHERE $timeFilter\n AND( ('$type' = '1,2,3,4' AND type != 'QueryStart') OR ('$type' != '1,2,3,4' AND type IN ($type)))\n $conditionalTest(AND query_kind IN ($query_kind), $query_kind)\n $conditionalTest(AND initial_user IN ($user), $user)\n $conditionalTest(AND query_duration_ms >= $min_duration_ms,$min_duration_ms)\n $conditionalTest(AND query_duration_ms <= $max_duration_ms,$max_duration_ms)\n", - "rawQuery": "SELECT t, c/runningDifference(t/1000) cRate FROM ( SELECT (intDiv(toUInt32(event_time), 4) * 4) * 1000 AS t, count() c FROM cluster('all-sharded',system.query_log)\nWHERE event_date >= toDate(1694531229) AND event_date <= toDate(1694534829) AND event_time >= toDateTime(1694531229) AND event_time <= toDateTime(1694534829) AND event_date >= toDate(1694531229) AND event_date <= toDate(1694534829) AND event_time >= toDateTime(1694531229) AND event_time <= toDateTime(1694534829)\n AND( ('1,2,3,4' = '1,2,3,4' AND type != 'QueryStart') OR ('1,2,3,4' != '1,2,3,4' AND type IN (1,2,3,4)))\n \n \n \n GROUP BY t ORDER BY t)", + "rawQuery": "SELECT t, c/(t/1000 - lagInFrame(t/1000,1,0) OVER ()) cRate FROM ( SELECT (intDiv(toUInt32(event_time), 4) * 4) * 1000 AS t, count() c FROM cluster('all-sharded',system.query_log)\nWHERE event_date >= toDate(1694531229) AND event_date <= toDate(1694534829) AND event_time >= toDateTime(1694531229) AND event_time <= toDateTime(1694534829) AND event_date >= toDate(1694531229) AND event_date <= toDate(1694534829) AND event_time >= toDateTime(1694531229) AND event_time <= toDateTime(1694534829)\n AND( ('1,2,3,4' = '1,2,3,4' AND type != 'QueryStart') OR ('1,2,3,4' != '1,2,3,4' AND type IN (1,2,3,4)))\n \n \n \n GROUP BY t ORDER BY t)", "refId": "A", "resultFormat": "time_series", "round": "0s", diff --git a/grafana-dashboard/ClickHouse_Queries_dashboard.json b/grafana-dashboard/ClickHouse_Queries_dashboard.json index 20e080270..a349c6017 100644 --- a/grafana-dashboard/ClickHouse_Queries_dashboard.json +++ b/grafana-dashboard/ClickHouse_Queries_dashboard.json @@ -147,8 +147,8 @@ "format": "time_series", "interval": "", "intervalFactor": 2, - "query": "SELECT\r\n t,\r\n arrayMap(a -> (a.1, a.2 / runningDifference(t / 1000)), groupArr)\r\nFROM (\r\n SELECT t, groupArray((q, c)) AS groupArr\r\n FROM (\r\n SELECT\r\n (intDiv(toUInt32(event_time), 2) * 2) * 1000 AS t,\r\n normalizeQuery(query) AS q,\r\n count() c\r\n FROM cluster('all-sharded',system.query_log)\r\n WHERE $timeFilter\r\n AND( ('$type' = '1,2,3,4' AND type != 'QueryStart') OR ('$type' != '1,2,3,4' AND type IN ($type)))\r\n $conditionalTest(AND query_kind IN ($query_kind), $query_kind)\r\n $conditionalTest(AND initial_user IN ($user), $user)\r\n $conditionalTest(AND query_duration_ms >= $min_duration_ms, $min_duration_ms)\r\n $conditionalTest(AND query_duration_ms <= $max_duration_ms, $max_duration_ms)\r\n AND normalized_query_hash GLOBAL IN (\r\n SELECT normalized_query_hash AS h\r\n FROM cluster('all-sharded',system.query_log)\r\n WHERE $timeFilter\r\n AND( ('$type' = '1,2,3,4' AND type != 'QueryStart') OR ('$type' != '1,2,3,4' AND type IN ($type)))\r\n $conditionalTest(AND query_kind IN ($query_kind), $query_kind)\r\n $conditionalTest(AND type IN ($type), $type)\r\n $conditionalTest(AND initial_user IN ($user), $user)\r\n $conditionalTest(AND query_duration_ms >= $min_duration_ms, $min_duration_ms)\r\n $conditionalTest(AND query_duration_ms <= $max_duration_ms, $max_duration_ms)\r\n GROUP BY h\r\n ORDER BY count() DESC\r\n LIMIT $top\r\n SETTINGS skip_unavailable_shards=1\r\n )\r\n GROUP BY t, query\r\n ORDER BY t\r\n )\r\n GROUP BY t\r\n ORDER BY t\r\n) SETTINGS skip_unavailable_shards=1", - "rawQuery": "SELECT\r\n t,\r\n arrayMap(a -> (a.1, a.2 / runningDifference(t / 1000)), groupArr)\r\nFROM (\r\n SELECT t, groupArray((q, c)) AS groupArr\r\n FROM (\r\n SELECT\r\n (intDiv(toUInt32(event_time), 2) * 2) * 1000 AS t,\r\n normalizeQuery(query) AS q,\r\n count() c\r\n FROM cluster('all-sharded',system.query_log)\r\n WHERE event_date >= toDate(1694531137) AND event_date <= toDate(1694534737) AND event_time >= toDateTime(1694531137) AND event_time <= toDateTime(1694534737)\r\n AND( ('1,2,3,4' = '1,2,3,4' AND type != 'QueryStart') OR ('1,2,3,4' != '1,2,3,4' AND type IN (1,2,3,4)))\r\n \r\n \r\n \r\n \r\n AND normalized_query_hash GLOBAL IN (\r\n SELECT normalized_query_hash AS h\r\n FROM cluster('all-sharded',system.query_log)\r\n WHERE event_date >= toDate(1694531137) AND event_date <= toDate(1694534737) AND event_time >= toDateTime(1694531137) AND event_time <= toDateTime(1694534737)\r\n AND( ('1,2,3,4' = '1,2,3,4' AND type != 'QueryStart') OR ('1,2,3,4' != '1,2,3,4' AND type IN (1,2,3,4)))\r\n \r\n \r\n \r\n \r\n \r\n GROUP BY h\r\n ORDER BY count() DESC\r\n LIMIT 30\r\n SETTINGS skip_unavailable_shards=1\r\n )\r\n GROUP BY t, query\r\n ORDER BY t\r\n )\r\n GROUP BY t\r\n ORDER BY t\r\n) SETTINGS skip_unavailable_shards=1", + "query": "SELECT\r\n t,\r\n arrayMap(a -> (a.1, a.2 / (t/1000 - lagInFrame(t/1000,1,0) OVER ()) ), groupArr)\r\nFROM (\r\n SELECT t, groupArray((q, c)) AS groupArr\r\n FROM (\r\n SELECT\r\n (intDiv(toUInt32(event_time), 2) * 2) * 1000 AS t,\r\n normalizeQuery(query) AS q,\r\n count() c\r\n FROM cluster('all-sharded',system.query_log)\r\n WHERE $timeFilter\r\n AND( ('$type' = '1,2,3,4' AND type != 'QueryStart') OR ('$type' != '1,2,3,4' AND type IN ($type)))\r\n $conditionalTest(AND query_kind IN ($query_kind), $query_kind)\r\n $conditionalTest(AND initial_user IN ($user), $user)\r\n $conditionalTest(AND query_duration_ms >= $min_duration_ms, $min_duration_ms)\r\n $conditionalTest(AND query_duration_ms <= $max_duration_ms, $max_duration_ms)\r\n AND normalized_query_hash GLOBAL IN (\r\n SELECT normalized_query_hash AS h\r\n FROM cluster('all-sharded',system.query_log)\r\n WHERE $timeFilter\r\n AND( ('$type' = '1,2,3,4' AND type != 'QueryStart') OR ('$type' != '1,2,3,4' AND type IN ($type)))\r\n $conditionalTest(AND query_kind IN ($query_kind), $query_kind)\r\n $conditionalTest(AND type IN ($type), $type)\r\n $conditionalTest(AND initial_user IN ($user), $user)\r\n $conditionalTest(AND query_duration_ms >= $min_duration_ms, $min_duration_ms)\r\n $conditionalTest(AND query_duration_ms <= $max_duration_ms, $max_duration_ms)\r\n GROUP BY h\r\n ORDER BY count() DESC\r\n LIMIT $top\r\n SETTINGS skip_unavailable_shards=1\r\n )\r\n GROUP BY t, query\r\n ORDER BY t\r\n )\r\n GROUP BY t\r\n ORDER BY t\r\n) SETTINGS skip_unavailable_shards=1", + "rawQuery": "SELECT\r\n t,\r\n arrayMap(a -> (a.1, a.2 / (t/1000 - lagInFrame(t/1000,1,0) OVER ()) ), groupArr)\r\nFROM (\r\n SELECT t, groupArray((q, c)) AS groupArr\r\n FROM (\r\n SELECT\r\n (intDiv(toUInt32(event_time), 2) * 2) * 1000 AS t,\r\n normalizeQuery(query) AS q,\r\n count() c\r\n FROM cluster('all-sharded',system.query_log)\r\n WHERE event_date >= toDate(1694531137) AND event_date <= toDate(1694534737) AND event_time >= toDateTime(1694531137) AND event_time <= toDateTime(1694534737)\r\n AND( ('1,2,3,4' = '1,2,3,4' AND type != 'QueryStart') OR ('1,2,3,4' != '1,2,3,4' AND type IN (1,2,3,4)))\r\n \r\n \r\n \r\n \r\n AND normalized_query_hash GLOBAL IN (\r\n SELECT normalized_query_hash AS h\r\n FROM cluster('all-sharded',system.query_log)\r\n WHERE event_date >= toDate(1694531137) AND event_date <= toDate(1694534737) AND event_time >= toDateTime(1694531137) AND event_time <= toDateTime(1694534737)\r\n AND( ('1,2,3,4' = '1,2,3,4' AND type != 'QueryStart') OR ('1,2,3,4' != '1,2,3,4' AND type IN (1,2,3,4)))\r\n \r\n \r\n \r\n \r\n \r\n GROUP BY h\r\n ORDER BY count() DESC\r\n LIMIT 30\r\n SETTINGS skip_unavailable_shards=1\r\n )\r\n GROUP BY t, query\r\n ORDER BY t\r\n )\r\n GROUP BY t\r\n ORDER BY t\r\n) SETTINGS skip_unavailable_shards=1", "refId": "A", "resultFormat": "time_series", "round": "0s", @@ -743,7 +743,7 @@ "interval": "", "intervalFactor": 2, "query": "$rate(count() c)\nFROM cluster('all-sharded',system.query_log)\nWHERE $timeFilter\n AND( ('$type' = '1,2,3,4' AND type != 'QueryStart') OR ('$type' != '1,2,3,4' AND type IN ($type)))\n $conditionalTest(AND query_kind IN ($query_kind), $query_kind)\n $conditionalTest(AND initial_user IN ($user), $user)\n $conditionalTest(AND query_duration_ms >= $min_duration_ms,$min_duration_ms)\n $conditionalTest(AND query_duration_ms <= $max_duration_ms,$max_duration_ms)\n", - "rawQuery": "SELECT t, c/runningDifference(t/1000) cRate FROM ( SELECT (intDiv(toUInt32(event_time), 4) * 4) * 1000 AS t, count() c FROM cluster('all-sharded',system.query_log)\nWHERE event_date >= toDate(1694531229) AND event_date <= toDate(1694534829) AND event_time >= toDateTime(1694531229) AND event_time <= toDateTime(1694534829) AND event_date >= toDate(1694531229) AND event_date <= toDate(1694534829) AND event_time >= toDateTime(1694531229) AND event_time <= toDateTime(1694534829)\n AND( ('1,2,3,4' = '1,2,3,4' AND type != 'QueryStart') OR ('1,2,3,4' != '1,2,3,4' AND type IN (1,2,3,4)))\n \n \n \n GROUP BY t ORDER BY t)", + "rawQuery": "SELECT t, c/(t/1000 - lagInFrame(t/1000,1,0) OVER ()) cRate FROM ( SELECT (intDiv(toUInt32(event_time), 4) * 4) * 1000 AS t, count() c FROM cluster('all-sharded',system.query_log)\nWHERE event_date >= toDate(1694531229) AND event_date <= toDate(1694534829) AND event_time >= toDateTime(1694531229) AND event_time <= toDateTime(1694534829) AND event_date >= toDate(1694531229) AND event_date <= toDate(1694534829) AND event_time >= toDateTime(1694531229) AND event_time <= toDateTime(1694534829)\n AND( ('1,2,3,4' = '1,2,3,4' AND type != 'QueryStart') OR ('1,2,3,4' != '1,2,3,4' AND type IN (1,2,3,4)))\n \n \n \n GROUP BY t ORDER BY t)", "refId": "A", "resultFormat": "time_series", "round": "0s", From a7996b5544df1cdc9e73ae6de7e04e0d57905cb3 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Wed, 8 Jan 2025 14:08:05 +0300 Subject: [PATCH 080/161] dev: metrics labels --- pkg/metrics/operator/labels.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pkg/metrics/operator/labels.go b/pkg/metrics/operator/labels.go index 28aff43d0..90ecd710e 100644 --- a/pkg/metrics/operator/labels.go +++ b/pkg/metrics/operator/labels.go @@ -28,21 +28,21 @@ func GetLabelsFromSource(src labelsSource) (labels map[string]string) { ) } -func getLabelsFromName(chi labelsSource) map[string]string { +func getLabelsFromName(src labelsSource) map[string]string { return map[string]string{ - "chi": chi.GetName(), - "namespace": chi.GetNamespace(), + "chi": src.GetName(), + "namespace": src.GetNamespace(), } } -func getLabelsFromLabels(chi labelsSource) map[string]string { - return chi.GetLabels() +func getLabelsFromLabels(src labelsSource) map[string]string { + return src.GetLabels() } -func getLabelsFromAnnotations(chi labelsSource) map[string]string { +func getLabelsFromAnnotations(src labelsSource) map[string]string { // Exclude skipped annotations return util.CopyMapFilter( - chi.GetAnnotations(), + src.GetAnnotations(), nil, util.ListSkippedAnnotations(), ) From 805f25b40d677a58ec6a6dff71510f924df31fc2 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Wed, 8 Jan 2025 15:09:05 +0300 Subject: [PATCH 081/161] dev: streamline metrics --- pkg/controller/chi/metrics/metrics.go | 44 +++++++++------------------ 1 file changed, 15 insertions(+), 29 deletions(-) diff --git a/pkg/controller/chi/metrics/metrics.go b/pkg/controller/chi/metrics/metrics.go index c3e221128..ed93167a6 100644 --- a/pkg/controller/chi/metrics/metrics.go +++ b/pkg/controller/chi/metrics/metrics.go @@ -53,86 +53,72 @@ type Metrics struct { } func createMetrics() *Metrics { + m := &Metrics{} // The unit u should be defined using the appropriate [UCUM](https://ucum.org) case-sensitive code. - CHIReconcilesStarted, _ := operator.Meter().Int64Counter( + m.CHIReconcilesStarted, _ = operator.Meter().Int64Counter( "clickhouse_operator_chi_reconciles_started", metric.WithDescription("number of CHI reconciles started"), metric.WithUnit("items"), ) - CHIReconcilesCompleted, _ := operator.Meter().Int64Counter( + m.CHIReconcilesCompleted, _ = operator.Meter().Int64Counter( "clickhouse_operator_chi_reconciles_completed", metric.WithDescription("number of CHI reconciles completed successfully"), metric.WithUnit("items"), ) - CHIReconcilesAborted, _ := operator.Meter().Int64Counter( + m.CHIReconcilesAborted, _ = operator.Meter().Int64Counter( "clickhouse_operator_chi_reconciles_aborted", metric.WithDescription("number of CHI reconciles aborted"), metric.WithUnit("items"), ) - CHIReconcilesTimings, _ := operator.Meter().Float64Histogram( + m.CHIReconcilesTimings, _ = operator.Meter().Float64Histogram( "clickhouse_operator_chi_reconciles_timings", metric.WithDescription("timings of CHI reconciles completed successfully"), metric.WithUnit("s"), ) - HostReconcilesStarted, _ := operator.Meter().Int64Counter( + m.HostReconcilesStarted, _ = operator.Meter().Int64Counter( "clickhouse_operator_host_reconciles_started", metric.WithDescription("number of host reconciles started"), metric.WithUnit("items"), ) - HostReconcilesCompleted, _ := operator.Meter().Int64Counter( + m.HostReconcilesCompleted, _ = operator.Meter().Int64Counter( "clickhouse_operator_host_reconciles_completed", metric.WithDescription("number of host reconciles completed successfully"), metric.WithUnit("items"), ) - HostReconcilesRestarts, _ := operator.Meter().Int64Counter( + m.HostReconcilesRestarts, _ = operator.Meter().Int64Counter( "clickhouse_operator_host_reconciles_restarts", metric.WithDescription("number of host restarts during reconciles"), metric.WithUnit("items"), ) - HostReconcilesErrors, _ := operator.Meter().Int64Counter( + m.HostReconcilesErrors, _ = operator.Meter().Int64Counter( "clickhouse_operator_host_reconciles_errors", metric.WithDescription("number of host reconciles errors"), metric.WithUnit("items"), ) - HostReconcilesTimings, _ := operator.Meter().Float64Histogram( + m.HostReconcilesTimings, _ = operator.Meter().Float64Histogram( "clickhouse_operator_host_reconciles_timings", metric.WithDescription("timings of host reconciles completed successfully"), metric.WithUnit("s"), ) - PodAddEvents, _ := operator.Meter().Int64Counter( + m.PodAddEvents, _ = operator.Meter().Int64Counter( "clickhouse_operator_pod_add_events", metric.WithDescription("number PodAdd events"), metric.WithUnit("items"), ) - PodUpdateEvents, _ := operator.Meter().Int64Counter( + m.PodUpdateEvents, _ = operator.Meter().Int64Counter( "clickhouse_operator_pod_update_events", metric.WithDescription("number PodUpdate events"), metric.WithUnit("items"), ) - PodDeleteEvents, _ := operator.Meter().Int64Counter( + m.PodDeleteEvents, _ = operator.Meter().Int64Counter( "clickhouse_operator_pod_delete_events", metric.WithDescription("number PodDelete events"), metric.WithUnit("items"), ) - return &Metrics{ - CHIReconcilesStarted: CHIReconcilesStarted, - CHIReconcilesCompleted: CHIReconcilesCompleted, - CHIReconcilesAborted: CHIReconcilesAborted, - CHIReconcilesTimings: CHIReconcilesTimings, - - HostReconcilesStarted: HostReconcilesStarted, - HostReconcilesCompleted: HostReconcilesCompleted, - HostReconcilesRestarts: HostReconcilesRestarts, - HostReconcilesErrors: HostReconcilesErrors, - HostReconcilesTimings: HostReconcilesTimings, - - PodAddEvents: PodAddEvents, - PodUpdateEvents: PodUpdateEvents, - PodDeleteEvents: PodDeleteEvents, - } + return m } var m *Metrics @@ -191,7 +177,7 @@ func HostReconcilesTimings(ctx context.Context, src labelsSource, seconds float6 func PodAdd(ctx context.Context) { ensureMetrics().PodAddEvents.Add(ctx, 1) } -func metricsPodUpdate(ctx context.Context) { +func PodUpdate(ctx context.Context) { ensureMetrics().PodUpdateEvents.Add(ctx, 1) } func PodDelete(ctx context.Context) { From 4a224ea65500e93a038ebaaf7722d8ace46560ca Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Fri, 10 Jan 2025 14:37:09 +0300 Subject: [PATCH 082/161] dev: streamline queue --- pkg/controller/chi/controller.go | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/pkg/controller/chi/controller.go b/pkg/controller/chi/controller.go index f09044f36..cef72bab5 100644 --- a/pkg/controller/chi/controller.go +++ b/pkg/controller/chi/controller.go @@ -71,7 +71,7 @@ type Controller struct { extClient apiExtensions.Interface chopClient chopClientSet.Interface - // queues used to organize events queue processed by operator + // queues used to organize events queue processed by the operator queues []queue.PriorityQueue // not used explicitly recorder record.EventRecorder @@ -130,19 +130,23 @@ func NewController( // initQueues func (c *Controller) initQueues() { - queuesNum := chop.Config().Reconcile.Runtime.ReconcileCHIsThreadsNumber + api.DefaultReconcileSystemThreadsNumber - for i := 0; i < queuesNum; i++ { - c.queues = append( - c.queues, - queue.New(), - //workqueue.NewNamedRateLimitingQueue( - // workqueue.DefaultControllerRateLimiter(), - // fmt.Sprintf("chi%d", i), - //), - ) + for i := 0; i < c.getQueuesNum(); i++ { + c.queues = append(c.queues, c.createQueue()) } } +func (c *Controller) getQueuesNum() int { + return chop.Config().Reconcile.Runtime.ReconcileCHIsThreadsNumber + api.DefaultReconcileSystemThreadsNumber +} + +func (c *Controller) createQueue() queue.PriorityQueue { + return queue.New() + //workqueue.NewNamedRateLimitingQueue( + // workqueue.DefaultControllerRateLimiter(), + // fmt.Sprintf("chi%d", i), + //), +} + func (c *Controller) addEventHandlersCHI( chopInformerFactory chopInformers.SharedInformerFactory, ) { From f91339e6e868745bded36ec021d20b3924b69a88 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Fri, 10 Jan 2025 15:42:58 +0300 Subject: [PATCH 083/161] dev: extract pkg interface --- pkg/controller/chi/metrics/metrics.go | 28 ++++++------ pkg/controller/chi/metrics/pkg.go | 62 +++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 14 deletions(-) create mode 100644 pkg/controller/chi/metrics/pkg.go diff --git a/pkg/controller/chi/metrics/metrics.go b/pkg/controller/chi/metrics/metrics.go index ed93167a6..8388fe562 100644 --- a/pkg/controller/chi/metrics/metrics.go +++ b/pkg/controller/chi/metrics/metrics.go @@ -130,11 +130,11 @@ func ensureMetrics() *Metrics { return m } -// CHIInitZeroValues initializes all metrics for CHI to zero values if not already present with appropriate labels +// chiInitZeroValues initializes all metrics for CHI to zero values if not already present with appropriate labels // // This is due to `rate` prometheus function limitation where it expects the metric to be 0-initialized with all possible labels // and doesn't default to 0 if the metric is not present. -func CHIInitZeroValues(ctx context.Context, src labelsSource) { +func chiInitZeroValues(ctx context.Context, src labelsSource) { ensureMetrics().CHIReconcilesStarted.Add(ctx, 0, labels(src)) ensureMetrics().CHIReconcilesCompleted.Add(ctx, 0, labels(src)) ensureMetrics().CHIReconcilesAborted.Add(ctx, 0, labels(src)) @@ -145,41 +145,41 @@ func CHIInitZeroValues(ctx context.Context, src labelsSource) { ensureMetrics().HostReconcilesErrors.Add(ctx, 0, labels(src)) } -func CHIReconcilesStarted(ctx context.Context, src labelsSource) { +func chiReconcilesStarted(ctx context.Context, src labelsSource) { ensureMetrics().CHIReconcilesStarted.Add(ctx, 1, labels(src)) } -func CHIReconcilesCompleted(ctx context.Context, src labelsSource) { +func chiReconcilesCompleted(ctx context.Context, src labelsSource) { ensureMetrics().CHIReconcilesCompleted.Add(ctx, 1, labels(src)) } -func CHIReconcilesAborted(ctx context.Context, src labelsSource) { +func chiReconcilesAborted(ctx context.Context, src labelsSource) { ensureMetrics().CHIReconcilesAborted.Add(ctx, 1, labels(src)) } -func CHIReconcilesTimings(ctx context.Context, src labelsSource, seconds float64) { +func chiReconcilesTimings(ctx context.Context, src labelsSource, seconds float64) { ensureMetrics().CHIReconcilesTimings.Record(ctx, seconds, labels(src)) } -func HostReconcilesStarted(ctx context.Context, src labelsSource) { +func hostReconcilesStarted(ctx context.Context, src labelsSource) { ensureMetrics().HostReconcilesStarted.Add(ctx, 1, labels(src)) } -func HostReconcilesCompleted(ctx context.Context, src labelsSource) { +func hostReconcilesCompleted(ctx context.Context, src labelsSource) { ensureMetrics().HostReconcilesCompleted.Add(ctx, 1, labels(src)) } -func HostReconcilesRestart(ctx context.Context, src labelsSource) { +func hostReconcilesRestart(ctx context.Context, src labelsSource) { ensureMetrics().HostReconcilesRestarts.Add(ctx, 1, labels(src)) } -func HostReconcilesErrors(ctx context.Context, src labelsSource) { +func hostReconcilesErrors(ctx context.Context, src labelsSource) { ensureMetrics().HostReconcilesErrors.Add(ctx, 1, labels(src)) } -func HostReconcilesTimings(ctx context.Context, src labelsSource, seconds float64) { +func hostReconcilesTimings(ctx context.Context, src labelsSource, seconds float64) { ensureMetrics().HostReconcilesTimings.Record(ctx, seconds, labels(src)) } -func PodAdd(ctx context.Context) { +func podAdd(ctx context.Context) { ensureMetrics().PodAddEvents.Add(ctx, 1) } -func PodUpdate(ctx context.Context) { +func podUpdate(ctx context.Context) { ensureMetrics().PodUpdateEvents.Add(ctx, 1) } -func PodDelete(ctx context.Context) { +func podDelete(ctx context.Context) { ensureMetrics().PodDeleteEvents.Add(ctx, 1) } diff --git a/pkg/controller/chi/metrics/pkg.go b/pkg/controller/chi/metrics/pkg.go new file mode 100644 index 000000000..bcb42f222 --- /dev/null +++ b/pkg/controller/chi/metrics/pkg.go @@ -0,0 +1,62 @@ +// Copyright 2019 Altinity Ltd and/or its affiliates. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package metrics + +import ( + "context" +) + +func CHIInitZeroValues(ctx context.Context, src labelsSource) { + chiInitZeroValues(ctx, src) +} + +func CHIReconcilesStarted(ctx context.Context, src labelsSource) { + chiReconcilesStarted(ctx, src) +} +func CHIReconcilesCompleted(ctx context.Context, src labelsSource) { + chiReconcilesCompleted(ctx, src) +} +func CHIReconcilesAborted(ctx context.Context, src labelsSource) { + chiReconcilesAborted(ctx, src) +} +func CHIReconcilesTimings(ctx context.Context, src labelsSource, seconds float64) { + chiReconcilesTimings(ctx, src, seconds) +} + +func HostReconcilesStarted(ctx context.Context, src labelsSource) { + hostReconcilesStarted(ctx, src) +} +func HostReconcilesCompleted(ctx context.Context, src labelsSource) { + hostReconcilesCompleted(ctx, src) +} +func HostReconcilesRestart(ctx context.Context, src labelsSource) { + hostReconcilesRestart(ctx, src) +} +func HostReconcilesErrors(ctx context.Context, src labelsSource) { + hostReconcilesErrors(ctx, src) +} +func HostReconcilesTimings(ctx context.Context, src labelsSource, seconds float64) { + hostReconcilesTimings(ctx, src, seconds) +} + +func PodAdd(ctx context.Context) { + podAdd(ctx) +} +func PodUpdate(ctx context.Context) { + podUpdate(ctx) +} +func PodDelete(ctx context.Context) { + podDelete(ctx) +} From d1ad03431148f6492ac8e29ea3c3ab6ce7c3f425 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Fri, 10 Jan 2025 15:43:09 +0300 Subject: [PATCH 084/161] dev: format --- pkg/controller/chi/controller.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/controller/chi/controller.go b/pkg/controller/chi/controller.go index cef72bab5..7f1ef2571 100644 --- a/pkg/controller/chi/controller.go +++ b/pkg/controller/chi/controller.go @@ -131,7 +131,7 @@ func NewController( // initQueues func (c *Controller) initQueues() { for i := 0; i < c.getQueuesNum(); i++ { - c.queues = append(c.queues, c.createQueue()) + c.queues = append(c.queues, c.createQueue()) } } From ca064f737f88a84832fabad2c2de485b68f8c7b7 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Fri, 10 Jan 2025 19:40:57 +0300 Subject: [PATCH 085/161] dev: simplify namespaced name usage --- pkg/util/k8s.go | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/pkg/util/k8s.go b/pkg/util/k8s.go index 5cf340ca1..dc513933c 100644 --- a/pkg/util/k8s.go +++ b/pkg/util/k8s.go @@ -16,22 +16,26 @@ package util import ( core "k8s.io/api/core/v1" - meta "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" ) +type iNamespaceName interface { + GetNamespace() string + GetName() string +} + // NamespaceName returns namespace and anme from the meta -func NamespaceName(meta meta.Object) (string, string) { +func NamespaceName(meta iNamespaceName) (string, string) { return meta.GetNamespace(), meta.GetName() } // NamespaceNameString returns namespace and name as one string -func NamespaceNameString(meta meta.Object) string { +func NamespaceNameString(meta iNamespaceName) string { return meta.GetNamespace() + "/" + meta.GetName() } // NamespacedName returns NamespacedName from obj -func NamespacedName(obj meta.Object) types.NamespacedName { +func NamespacedName(obj iNamespaceName) types.NamespacedName { return types.NamespacedName{ Namespace: obj.GetNamespace(), Name: obj.GetName(), From b0c10fd75594a39009d1c0dd5b2be61b403076cc Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Fri, 10 Jan 2025 19:41:47 +0300 Subject: [PATCH 086/161] dev: introduce low-level chi metric approach --- pkg/controller/chi/metrics/metrics.go | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pkg/controller/chi/metrics/metrics.go b/pkg/controller/chi/metrics/metrics.go index 8388fe562..9ee002212 100644 --- a/pkg/controller/chi/metrics/metrics.go +++ b/pkg/controller/chi/metrics/metrics.go @@ -34,6 +34,8 @@ type Metrics struct { CHIReconcilesAborted metric.Int64Counter // CHIReconcilesTimings is a histogram of durations of successfully completed CHI reconciles CHIReconcilesTimings metric.Float64Histogram + // CHI is a number (counter) of available CHIs + CHI metric.Int64UpDownCounter // HostReconcilesStarted is a number (counter) of started host reconciles HostReconcilesStarted metric.Int64Counter @@ -75,6 +77,11 @@ func createMetrics() *Metrics { metric.WithDescription("timings of CHI reconciles completed successfully"), metric.WithUnit("s"), ) + m.CHI, _ = operator.Meter().Int64UpDownCounter( + "clickhouse_operator_chi", + metric.WithDescription("number of CHI available"), + metric.WithUnit("items"), + ) m.HostReconcilesStarted, _ = operator.Meter().Int64Counter( "clickhouse_operator_host_reconciles_started", @@ -157,6 +164,12 @@ func chiReconcilesAborted(ctx context.Context, src labelsSource) { func chiReconcilesTimings(ctx context.Context, src labelsSource, seconds float64) { ensureMetrics().CHIReconcilesTimings.Record(ctx, seconds, labels(src)) } +func chiRegister(ctx context.Context, src labelsSource) { + ensureMetrics().CHI.Add(ctx, 1, labels(src)) +} +func chiUnregister(ctx context.Context, src labelsSource) { + ensureMetrics().CHI.Add(ctx, -1, labels(src)) +} func hostReconcilesStarted(ctx context.Context, src labelsSource) { ensureMetrics().HostReconcilesStarted.Add(ctx, 1, labels(src)) From fcc24bd99e1617a6d950f5092ac10609f7e68f6b Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Fri, 10 Jan 2025 19:42:49 +0300 Subject: [PATCH 087/161] dev: chi register/unregister interface --- pkg/controller/chi/metrics/pkg.go | 38 +++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/pkg/controller/chi/metrics/pkg.go b/pkg/controller/chi/metrics/pkg.go index bcb42f222..ddbc9255e 100644 --- a/pkg/controller/chi/metrics/pkg.go +++ b/pkg/controller/chi/metrics/pkg.go @@ -16,6 +16,9 @@ package metrics import ( "context" + "sync" + + "github.com/altinity/clickhouse-operator/pkg/util" ) func CHIInitZeroValues(ctx context.Context, src labelsSource) { @@ -60,3 +63,38 @@ func PodUpdate(ctx context.Context) { func PodDelete(ctx context.Context) { podDelete(ctx) } + +var r = map[string]bool{} +var mx = sync.Mutex{} + +func CHIRegister(ctx context.Context, src labelsSource) { + mx.Lock() + defer mx.Unlock() + + if registered, found := r[createRegistryKey(src)]; found && registered { + // Already registered + return + } + + // Need to register + r[createRegistryKey(src)] = true + chiRegister(ctx, src) +} + +func CHIUnregister(ctx context.Context, src labelsSource) { + mx.Lock() + defer mx.Unlock() + + if registered, found := r[createRegistryKey(src)]; !registered || !found { + // Already unregistered + return + } + + // Need to unregister + r[createRegistryKey(src)] = false + chiUnregister(ctx, src) +} + +func createRegistryKey(src labelsSource) string { + return util.NamespaceNameString(src) +} From 8261b10097122578f5028477ccdc6ece7a1cdef5 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Fri, 10 Jan 2025 19:43:10 +0300 Subject: [PATCH 088/161] dev: add register to updates --- pkg/controller/chi/worker.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pkg/controller/chi/worker.go b/pkg/controller/chi/worker.go index e66ee7dfe..724b1f3f7 100644 --- a/pkg/controller/chi/worker.go +++ b/pkg/controller/chi/worker.go @@ -17,6 +17,7 @@ package chi import ( "context" "errors" + "github.com/altinity/clickhouse-operator/pkg/controller/chi/metrics" "time" core "k8s.io/api/core/v1" @@ -335,8 +336,11 @@ func (w *worker) updateCHI(ctx context.Context, old, new *api.ClickHouseInstalla new = n.(*api.ClickHouseInstallation) } + metrics.CHIRegister(ctx, new) + if w.deleteCHI(ctx, old, new) { // CHI is being deleted + metrics.CHIUnregister(ctx, new) return nil } From c368d163e3342af0c83a75adbcabe6c79a69e4a1 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Fri, 10 Jan 2025 19:43:22 +0300 Subject: [PATCH 089/161] dev: add register to deletes --- pkg/controller/chi/worker-deleter.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pkg/controller/chi/worker-deleter.go b/pkg/controller/chi/worker-deleter.go index 4555a9b29..da13f83d8 100644 --- a/pkg/controller/chi/worker-deleter.go +++ b/pkg/controller/chi/worker-deleter.go @@ -16,6 +16,7 @@ package chi import ( "context" + "github.com/altinity/clickhouse-operator/pkg/controller/chi/metrics" "time" core "k8s.io/api/core/v1" @@ -248,6 +249,8 @@ func (w *worker) discoveryAndDeleteCR(ctx context.Context, cr api.ICustomResourc return nil } + metrics.CHIUnregister(ctx, cr) + objs := w.c.discovery(ctx, cr) if objs.NumStatefulSet() > 0 { cr.WalkHosts(func(host *api.Host) error { From c7d830c4b5b5a099a7cce5f8946c16f7b5c1ac08 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Fri, 10 Jan 2025 19:43:56 +0300 Subject: [PATCH 090/161] format --- pkg/controller/chi/worker.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/controller/chi/worker.go b/pkg/controller/chi/worker.go index 724b1f3f7..cbab798b4 100644 --- a/pkg/controller/chi/worker.go +++ b/pkg/controller/chi/worker.go @@ -17,7 +17,6 @@ package chi import ( "context" "errors" - "github.com/altinity/clickhouse-operator/pkg/controller/chi/metrics" "time" core "k8s.io/api/core/v1" @@ -28,6 +27,7 @@ import ( "github.com/altinity/clickhouse-operator/pkg/apis/common/types" "github.com/altinity/clickhouse-operator/pkg/apis/deployment" "github.com/altinity/clickhouse-operator/pkg/chop" + "github.com/altinity/clickhouse-operator/pkg/controller/chi/metrics" "github.com/altinity/clickhouse-operator/pkg/controller/common" a "github.com/altinity/clickhouse-operator/pkg/controller/common/announcer" "github.com/altinity/clickhouse-operator/pkg/controller/common/poller/domain" From 63698263f3260a5301a41a330a39b4775516574c Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Fri, 10 Jan 2025 19:44:26 +0300 Subject: [PATCH 091/161] format --- pkg/controller/chi/worker-deleter.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/controller/chi/worker-deleter.go b/pkg/controller/chi/worker-deleter.go index da13f83d8..16e272ff9 100644 --- a/pkg/controller/chi/worker-deleter.go +++ b/pkg/controller/chi/worker-deleter.go @@ -16,7 +16,6 @@ package chi import ( "context" - "github.com/altinity/clickhouse-operator/pkg/controller/chi/metrics" "time" core "k8s.io/api/core/v1" @@ -26,6 +25,7 @@ import ( api "github.com/altinity/clickhouse-operator/pkg/apis/clickhouse.altinity.com/v1" "github.com/altinity/clickhouse-operator/pkg/apis/common/types" "github.com/altinity/clickhouse-operator/pkg/controller" + "github.com/altinity/clickhouse-operator/pkg/controller/chi/metrics" a "github.com/altinity/clickhouse-operator/pkg/controller/common/announcer" "github.com/altinity/clickhouse-operator/pkg/controller/common/storage" "github.com/altinity/clickhouse-operator/pkg/model" From c21f8840bc08a6c3f64add3879e04b75879fc0f4 Mon Sep 17 00:00:00 2001 From: alz Date: Sun, 12 Jan 2025 15:24:08 +0300 Subject: [PATCH 092/161] Extended test_046 to check clickhouse_operator_chi metric. --- .../manifests/chi/test-028-replication.yaml | 5 +- ...045-chopconf.yaml => no-wait-queries.yaml} | 2 +- tests/e2e/steps.py | 2 +- tests/e2e/test_operator.py | 51 +++++++++++++------ 4 files changed, 39 insertions(+), 21 deletions(-) rename tests/e2e/manifests/chopconf/{test-045-chopconf.yaml => no-wait-queries.yaml} (84%) diff --git a/tests/e2e/manifests/chi/test-028-replication.yaml b/tests/e2e/manifests/chi/test-028-replication.yaml index c1d7db19a..3124fb29c 100644 --- a/tests/e2e/manifests/chi/test-028-replication.yaml +++ b/tests/e2e/manifests/chi/test-028-replication.yaml @@ -21,8 +21,5 @@ spec: clusters: - name: default layout: - shardsCount: 2 + shardsCount: 1 replicasCount: 2 - profiles: - default/database_atomic_wait_for_drop_and_detach_synchronously: 1 - default/allow_experimental_live_view: 1 diff --git a/tests/e2e/manifests/chopconf/test-045-chopconf.yaml b/tests/e2e/manifests/chopconf/no-wait-queries.yaml similarity index 84% rename from tests/e2e/manifests/chopconf/test-045-chopconf.yaml rename to tests/e2e/manifests/chopconf/no-wait-queries.yaml index 00f31a325..6565fd689 100644 --- a/tests/e2e/manifests/chopconf/test-045-chopconf.yaml +++ b/tests/e2e/manifests/chopconf/no-wait-queries.yaml @@ -1,7 +1,7 @@ apiVersion: "clickhouse.altinity.com/v1" kind: "ClickHouseOperatorConfiguration" metadata: - name: "test-045-chopconf" + name: "no-wait-queries" spec: reconcile: host: diff --git a/tests/e2e/steps.py b/tests/e2e/steps.py index db29e9159..5866b44ea 100644 --- a/tests/e2e/steps.py +++ b/tests/e2e/steps.py @@ -188,4 +188,4 @@ def check_metrics_monitoring( with Then("Not ready. Wait for " + str(i * 5) + " seconds"): time.sleep(i * 5) - assert expected_pattern_found, error() \ No newline at end of file + assert expected_pattern_found, error(out) \ No newline at end of file diff --git a/tests/e2e/test_operator.py b/tests/e2e/test_operator.py index aca5e5da7..dfa2cceeb 100644 --- a/tests/e2e/test_operator.py +++ b/tests/e2e/test_operator.py @@ -3029,7 +3029,6 @@ def test_027(self): @Requirements(RQ_SRS_026_ClickHouseOperator_Managing_RestartingOperator("1.0")) def test_028(self): create_shell_namespace_clickhouse_template() - util.require_keeper(keeper_type=self.context.keeper_type) manifest = "manifests/chi/test-028-replication.yaml" @@ -3043,17 +3042,17 @@ def test_028(self): "manifests/chit/tpl-persistent-volume-100Mi.yaml", }, "object_counts": { - "statefulset": 4, - "pod": 4, - "service": 5, + "statefulset": 2, + "pod": 2, + "service": 3, }, "do_not_delete": 1, }, ) - sql = """SET skip_unavailable_shards=1; SYSTEM DROP DNS CACHE; SELECT getMacro('replica') AS replica, uptime() AS uptime, + sql = """SELECT getMacro('replica') AS replica, uptime() AS uptime, (SELECT count() FROM system.clusters WHERE cluster='all-sharded') AS total_hosts, - (SELECT count() online_hosts FROM cluster('all-sharded', system.one) ) AS online_hosts + (SELECT count() online_hosts FROM cluster('all-sharded', system.one) settings skip_unavailable_shards=1 ) AS online_hosts FORMAT JSONEachRow""" note("Before restart") out = clickhouse.query_with_error(chi, sql) @@ -3085,8 +3084,8 @@ def test_028(self): ch2 = clickhouse.query_with_error( chi, sql, - pod="chi-test-028-replication-default-1-0-0", - host="chi-test-028-replication-default-1-0", + pod="chi-test-028-replication-default-0-1-0", + host="chi-test-028-replication-default-0-1", advanced_params="--connect_timeout=1 --send_timeout=10 --receive_timeout=10", ) @@ -3124,16 +3123,16 @@ def test_028(self): with Then("Clear RollingUpdate restart policy"): cmd = f"patch chi {chi} --type='json' --patch='[{{\"op\":\"remove\",\"path\":\"/spec/restart\"}}]'" kubectl.launch(cmd) - time.sleep(15) + kubectl.wait_chi_status(chi, "InProgress") kubectl.wait_chi_status(chi, "Completed") with Then("Restart operator. CHI should not be restarted"): check_operator_restart( chi=chi, wait_objects={ - "statefulset": 4, - "pod": 4, - "service": 5, + "statefulset": 2, + "pod": 2, + "service": 3, }, pod=f"chi-{chi}-default-0-0-0", ) @@ -3148,12 +3147,13 @@ def test_028(self): with When("Stop installation"): cmd = f'patch chi {chi} --type=\'json\' --patch=\'[{{"op":"add","path":"/spec/stop","value":"yes"}}]\'' kubectl.launch(cmd) + kubectl.wait_chi_status(chi, "InProgress") kubectl.wait_chi_status(chi, "Completed") with Then("Stateful sets should be there but no running pods"): kubectl.wait_objects(chi, { - "statefulset": 4, + "statefulset": 2, "pod": 0, - "service": 4, + "service": 2, }) with Finally("I clean up"): @@ -4482,7 +4482,7 @@ def test_045_2(self): create_shell_namespace_clickhouse_template() with Given("I set spec.reconcile.host.wait.queries property"): - util.apply_operator_config("manifests/chopconf/test-045-chopconf.yaml") + util.apply_operator_config("manifests/chopconf/no-wait-queries.yaml") test_045(manifest=f"manifests/chi/test-045-2-wait-query-finish.yaml") @@ -4492,6 +4492,9 @@ def test_045_2(self): def test_046(self): """Check that clickhouse-operator creates metrics for reconcile and other clickhouse-operator events.""" create_shell_namespace_clickhouse_template() + with Given("I change operator statefullSet timeout"): + util.apply_operator_config("manifests/chopconf/low-timeout.yaml") + cluster = "default" manifest = f"manifests/chi/test-046-0-clickhouse-operator-metrics.yaml" chi = yaml_manifest.get_name(util.get_full_path(manifest)) @@ -4517,8 +4520,15 @@ def check_metrics(metric_names): container="clickhouse-operator", port="9999", expect_pattern=metric_name, + max_retries=3 ) + with Then(f"Check clickhouse-operator exposes clickhouse_operator_chi metrics"): + check_metrics([ + "clickhouse_operator_chi{.*chi=\"test-046-operator-metrics\".*} 1", + ]) + + with Then(f"Check clickhouse-operator exposes clickhouse_operator_chi_reconciles_* metrics"): check_metrics([ "clickhouse_operator_chi_reconciles_started{.*chi=\"test-046-operator-metrics\".*} 1", @@ -4593,6 +4603,17 @@ def check_metrics(metric_names): "clickhouse_operator_host_reconciles_timings.*chi=\"test-046-operator-metrics\".*", ]) + with Then("Stop CHI"): + cmd = f'patch chi {chi} --type=\'json\' --patch=\'[{{"op":"add","path":"/spec/stop","value":"yes"}}]\'' + kubectl.launch(cmd) + kubectl.wait_chi_status(chi, "InProgress") + kubectl.wait_chi_status(chi, "Completed") + + with Then(f"Check clickhouse-operator exposes clickhouse_operator_chi metric for stopped chi"): + check_metrics([ + "clickhouse_operator_chi{.*chi=\"test-046-operator-metrics\".*} 1", + ]) + with Finally("I clean up"): delete_test_namespace() From 26be596d0d0f5a4223a8ed2a11f265465b94600e Mon Sep 17 00:00:00 2001 From: Eugene Klimov Date: Mon, 13 Jan 2025 14:02:33 +0500 Subject: [PATCH 093/161] add fixes to GRANT examples, fix https://github.com/Altinity/clickhouse-operator/issues/1615 (#1616) --- docs/chi-examples/05-settings-08-grants.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/chi-examples/05-settings-08-grants.yaml b/docs/chi-examples/05-settings-08-grants.yaml index 74962b162..d834c62e1 100644 --- a/docs/chi-examples/05-settings-08-grants.yaml +++ b/docs/chi-examples/05-settings-08-grants.yaml @@ -7,7 +7,8 @@ spec: users: myuser/profile: readonly myuser/grants/query: - - "GRANT SELECT,INSERT,ALTER,CREATE,DROP,TRUNCATE,OPTIMIZE,SHOW,dictGet,REMOTE ON canarydb.*" + # you can't combine `allow_databases` with `GRANT ... canarydb.*` + - "GRANT SELECT,INSERT,ALTER,CREATE,DROP,TRUNCATE,OPTIMIZE,SHOW,dictGet,REMOTE ON *.*" myuser/allow_databases/database: - "canarydb" myuser2/profile: default From 9a5a18d1d1e95472bfcf2cc6da2828ebb53750a1 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Mon, 13 Jan 2025 12:07:52 +0300 Subject: [PATCH 094/161] dev: naming --- pkg/interfaces/{config_map_type.go => config_map.go} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename pkg/interfaces/{config_map_type.go => config_map.go} (100%) diff --git a/pkg/interfaces/config_map_type.go b/pkg/interfaces/config_map.go similarity index 100% rename from pkg/interfaces/config_map_type.go rename to pkg/interfaces/config_map.go From 9f1093b2047b0143c52490a38fb8ff1259434772 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Fri, 17 Jan 2025 19:18:46 +0300 Subject: [PATCH 095/161] dev: remove unsused --- pkg/controller/chi/worker-chi-reconciler.go | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/pkg/controller/chi/worker-chi-reconciler.go b/pkg/controller/chi/worker-chi-reconciler.go index d7432b6f7..33cf9e176 100644 --- a/pkg/controller/chi/worker-chi-reconciler.go +++ b/pkg/controller/chi/worker-chi-reconciler.go @@ -652,17 +652,6 @@ func (w *worker) reconcileHost(ctx context.Context, host *api.Host) error { w.a.V(2).M(host).S().P() defer w.a.V(2).M(host).E().P() - //si := host.GetRuntime().GetAddress().GetShardIndex() - //ri := host.GetRuntime().GetAddress().GetReplicaIndex() - ////sleep := util.DecBottomed(si, 1, 0)*(si % 3)*20 - //sleep := (2 - si)*90 - //if ri > 0 { - // sleep = 0 - //} - //w.a.V(1).Info("Host [%d/%d]. Going to sleep %d sec", si, ri, sleep) - //time.Sleep((time.Duration)(sleep)*time.Second) - //w.a.V(1).Info("Host [%d/%d]. Done to sleep %d sec", si, ri) - metrics.HostReconcilesStarted(ctx, host.GetCR()) startTime := time.Now() From 9b25448a9a529e2c30f20a8b35a6d818753bc1ed Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Fri, 17 Jan 2025 19:58:02 +0300 Subject: [PATCH 096/161] dev: naming --- pkg/controller/chi/worker-chi-reconciler.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pkg/controller/chi/worker-chi-reconciler.go b/pkg/controller/chi/worker-chi-reconciler.go index 33cf9e176..fa0b62a7e 100644 --- a/pkg/controller/chi/worker-chi-reconciler.go +++ b/pkg/controller/chi/worker-chi-reconciler.go @@ -735,8 +735,8 @@ func (w *worker) reconcileHostPrepare(ctx context.Context, host *api.Host) error // reconcileHostMain reconciles specified ClickHouse host func (w *worker) reconcileHostMain(ctx context.Context, host *api.Host) error { var ( - reconcileStatefulSetOpts *statefulset.ReconcileOptions - migrateTableOpts *migrateTableOptions + stsReconcileOpts *statefulset.ReconcileOptions + migrateTableOpts *migrateTableOptions ) if err := w.reconcileConfigMapHost(ctx, host); err != nil { @@ -752,6 +752,7 @@ func (w *worker) reconcileHostMain(ctx context.Context, host *api.Host) error { w.a.V(1). M(host).F(). Info("Reconcile PVCs and check possible data loss for host: %s", host.GetName()) + if storage.ErrIsDataLoss( storage.NewStorageReconciler( w.task, @@ -762,7 +763,7 @@ func (w *worker) reconcileHostMain(ctx context.Context, host *api.Host) error { // In case of data loss detection on existing volumes, we need to: // 1. recreate StatefulSet // 2. run tables migration again - reconcileStatefulSetOpts = reconcileStatefulSetOpts.SetForceRecreate() + stsReconcileOpts = stsReconcileOpts.SetForceRecreate() migrateTableOpts = &migrateTableOptions{ forceMigrate: true, dropReplica: true, @@ -772,7 +773,7 @@ func (w *worker) reconcileHostMain(ctx context.Context, host *api.Host) error { Info("Data loss detected for host: %s. Will do force migrate", host.GetName()) } - if err := w.reconcileHostStatefulSet(ctx, host, reconcileStatefulSetOpts); err != nil { + if err := w.reconcileHostStatefulSet(ctx, host, stsReconcileOpts); err != nil { metrics.HostReconcilesErrors(ctx, host.GetCR()) w.a.V(1). M(host).F(). From 9610049e37447f3a496d4e4d30abb7dae8cae5dc Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Fri, 17 Jan 2025 20:03:14 +0300 Subject: [PATCH 097/161] dev: extract as a func --- pkg/controller/chi/worker-chi-reconciler.go | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/pkg/controller/chi/worker-chi-reconciler.go b/pkg/controller/chi/worker-chi-reconciler.go index fa0b62a7e..225349dd0 100644 --- a/pkg/controller/chi/worker-chi-reconciler.go +++ b/pkg/controller/chi/worker-chi-reconciler.go @@ -753,13 +753,7 @@ func (w *worker) reconcileHostMain(ctx context.Context, host *api.Host) error { M(host).F(). Info("Reconcile PVCs and check possible data loss for host: %s", host.GetName()) - if storage.ErrIsDataLoss( - storage.NewStorageReconciler( - w.task, - w.c.namer, - storage.NewStoragePVC(w.c.kube.Storage()), - ).ReconcilePVCs(ctx, host, api.DesiredStatefulSet), - ) { + if storage.ErrIsDataLoss(w.reconcilePVCs(ctx, host)) { // In case of data loss detection on existing volumes, we need to: // 1. recreate StatefulSet // 2. run tables migration again @@ -806,6 +800,14 @@ func (w *worker) reconcileHostMain(ctx context.Context, host *api.Host) error { return nil } +func (w *worker) reconcilePVCs(ctx context.Context, host *api.Host) storage.ErrorDataPersistence { + return storage.NewStorageReconciler( + w.task, + w.c.namer, + storage.NewStoragePVC(w.c.kube.Storage()), + ).ReconcilePVCs(ctx, host, api.DesiredStatefulSet) +} + // reconcileHostBootstrap reconciles specified ClickHouse host func (w *worker) reconcileHostBootstrap(ctx context.Context, host *api.Host) error { if err := w.includeHost(ctx, host); err != nil { From f77fbe85f9de712e01dfe1f8a527dad505dbf578 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Fri, 17 Jan 2025 20:03:32 +0300 Subject: [PATCH 098/161] dev: streamline --- pkg/controller/chi/worker-chi-reconciler.go | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pkg/controller/chi/worker-chi-reconciler.go b/pkg/controller/chi/worker-chi-reconciler.go index 225349dd0..54bccf544 100644 --- a/pkg/controller/chi/worker-chi-reconciler.go +++ b/pkg/controller/chi/worker-chi-reconciler.go @@ -775,11 +775,7 @@ func (w *worker) reconcileHostMain(ctx context.Context, host *api.Host) error { return err } // Polish all new volumes that operator has to create - _ = storage.NewStorageReconciler( - w.task, - w.c.namer, - storage.NewStoragePVC(w.c.kube.Storage()), - ).ReconcilePVCs(ctx, host, api.DesiredStatefulSet) + _ = w.reconcilePVCs(ctx, host) _ = w.reconcileHostService(ctx, host) From b31dd8ed4ac9853d4dd6f74eff54c6c5cb20bf6f Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Fri, 17 Jan 2025 20:05:16 +0300 Subject: [PATCH 099/161] dev: reuse pvc reconciler --- pkg/controller/chi/worker-chi-reconciler.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/controller/chi/worker-chi-reconciler.go b/pkg/controller/chi/worker-chi-reconciler.go index 54bccf544..68b3418c4 100644 --- a/pkg/controller/chi/worker-chi-reconciler.go +++ b/pkg/controller/chi/worker-chi-reconciler.go @@ -753,7 +753,7 @@ func (w *worker) reconcileHostMain(ctx context.Context, host *api.Host) error { M(host).F(). Info("Reconcile PVCs and check possible data loss for host: %s", host.GetName()) - if storage.ErrIsDataLoss(w.reconcilePVCs(ctx, host)) { + if storage.ErrIsDataLoss(w.reconcileHostPVCs(ctx, host)) { // In case of data loss detection on existing volumes, we need to: // 1. recreate StatefulSet // 2. run tables migration again @@ -775,7 +775,7 @@ func (w *worker) reconcileHostMain(ctx context.Context, host *api.Host) error { return err } // Polish all new volumes that operator has to create - _ = w.reconcilePVCs(ctx, host) + _ = w.reconcileHostPVCs(ctx, host) _ = w.reconcileHostService(ctx, host) @@ -796,7 +796,7 @@ func (w *worker) reconcileHostMain(ctx context.Context, host *api.Host) error { return nil } -func (w *worker) reconcilePVCs(ctx context.Context, host *api.Host) storage.ErrorDataPersistence { +func (w *worker) reconcileHostPVCs(ctx context.Context, host *api.Host) storage.ErrorDataPersistence { return storage.NewStorageReconciler( w.task, w.c.namer, From 974e784dc2f5659dda7eb359d97b6a29cd5e15f0 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Fri, 17 Jan 2025 20:07:30 +0300 Subject: [PATCH 100/161] dev: description --- pkg/controller/chi/worker-chi-reconciler.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/controller/chi/worker-chi-reconciler.go b/pkg/controller/chi/worker-chi-reconciler.go index 68b3418c4..1593a7ddd 100644 --- a/pkg/controller/chi/worker-chi-reconciler.go +++ b/pkg/controller/chi/worker-chi-reconciler.go @@ -780,7 +780,7 @@ func (w *worker) reconcileHostMain(ctx context.Context, host *api.Host) error { _ = w.reconcileHostService(ctx, host) // Prepare for tables migration. - // Sometimes service needs some time to start after creation|modification before being accessible for usage + // Sometimes service needs significant time to start after creation/modification before being accessible for usage // Check whether ClickHouse is running and accessible and what version is available. if version, err := w.pollHostForClickHouseVersion(ctx, host); err == nil { w.a.V(1). From 3948b35a9168889acc30c48290f6a2bc055b4023 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Fri, 17 Jan 2025 20:45:56 +0300 Subject: [PATCH 101/161] dev: message --- pkg/controller/chi/worker-chi-reconciler.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/controller/chi/worker-chi-reconciler.go b/pkg/controller/chi/worker-chi-reconciler.go index 1593a7ddd..a3ce21571 100644 --- a/pkg/controller/chi/worker-chi-reconciler.go +++ b/pkg/controller/chi/worker-chi-reconciler.go @@ -326,7 +326,7 @@ func (w *worker) reconcileHostStatefulSet(ctx context.Context, host *api.Host, o // In case we have to force-restart host // We'll do it via replicas: 0 in StatefulSet. if w.shouldForceRestartHost(host) { - w.a.V(1).M(host).F().Info("Reconcile host: %s. Shutting host down due to force restart", host.GetName()) + w.a.V(1).M(host).F().Info("Reconcile host. Shutting down due to force restart: %s", host.GetName()) w.stsReconciler.PrepareHostStatefulSetWithStatus(ctx, host, true) _ = w.stsReconciler.ReconcileStatefulSet(ctx, host, false, opts) metrics.HostReconcilesRestart(ctx, host.GetCR()) From 549283d4b7c17c449e2781cfe89d7aca242d9889 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Sat, 18 Jan 2025 00:27:40 +0300 Subject: [PATCH 102/161] dev: add pod restart counter getter --- pkg/controller/chi/kube/pod.go | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/pkg/controller/chi/kube/pod.go b/pkg/controller/chi/kube/pod.go index b18f10ffa..463305cf3 100644 --- a/pkg/controller/chi/kube/pod.go +++ b/pkg/controller/chi/kube/pod.go @@ -67,6 +67,21 @@ func (c *Pod) Get(params ...any) (*core.Pod, error) { return c.kubeClient.CoreV1().Pods(namespace).Get(controller.NewContext(), name, controller.NewGetOptions()) } +func (c *Pod) GetRestartCounters(params ...any) (map[string]int, error) { + pod, err := c.Get(params...) + if err != nil { + return nil, err + } + if len(pod.Status.ContainerStatuses) < 1 { + return nil, nil + } + res := map[string]int{} + for _, containerStatus := range pod.Status.ContainerStatuses { + res[containerStatus.Name] = int(containerStatus.RestartCount) + } + return res, nil +} + // GetAll gets all pods for provided entity func (c *Pod) GetAll(obj any) []*core.Pod { switch typed := obj.(type) { From c5e88740828547ee06472bc6c4d4a243853af1e3 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Sat, 18 Jan 2025 00:28:12 +0300 Subject: [PATCH 103/161] dev: map comparison --- pkg/util/map.go | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/pkg/util/map.go b/pkg/util/map.go index 7c92bc15d..551b74b4c 100644 --- a/pkg/util/map.go +++ b/pkg/util/map.go @@ -269,3 +269,21 @@ func MapMigrate(cur, new, old map[string]string) map[string]string { removed := MapGetSortedKeys(SubtractStringMaps(CopyMap(old), new)) return MapDeleteKeys(MergeStringMapsPreserve(new, cur), removed...) } + +func MapsAreTheSame(m1, m2 map[string]int) bool { + if len(m1) != len(m2) { + return false + } + + for k1, v1 := range m1 { + v2, found := m2[k1] + if !found { + return false + } + if v1 != v2 { + return false + } + } + + return true +} From 2cdb41a3059e5cee7e65824e7aad7d9120cd1f9a Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Sat, 18 Jan 2025 15:38:19 +0300 Subject: [PATCH 104/161] dev: move file --- ...ude-include-wait.go => worker-wait-exclude-include-restart.go} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename pkg/controller/chi/{worker-exclude-include-wait.go => worker-wait-exclude-include-restart.go} (100%) diff --git a/pkg/controller/chi/worker-exclude-include-wait.go b/pkg/controller/chi/worker-wait-exclude-include-restart.go similarity index 100% rename from pkg/controller/chi/worker-exclude-include-wait.go rename to pkg/controller/chi/worker-wait-exclude-include-restart.go From 254c369e8b4f0782ae3b46fb1968562512589056 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Sat, 18 Jan 2025 16:13:50 +0300 Subject: [PATCH 105/161] dev: rename context --- pkg/controller/common/poller/poller-functions.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pkg/controller/common/poller/poller-functions.go b/pkg/controller/common/poller/poller-functions.go index 578b582f3..a094e88ac 100644 --- a/pkg/controller/common/poller/poller-functions.go +++ b/pkg/controller/common/poller/poller-functions.go @@ -25,34 +25,34 @@ type Functions struct { ShouldContinue func(context.Context, any, error) bool } -func (p *Functions) CallGet(c context.Context) (any, error) { +func (p *Functions) CallGet(ctx context.Context) (any, error) { if p == nil { return nil, nil } if p.Get == nil { return nil, nil } - return p.Get(c) + return p.Get(ctx) } -func (p *Functions) CallIsDone(c context.Context, a any) bool { +func (p *Functions) CallIsDone(ctx context.Context, a any) bool { if p == nil { return false } if p.IsDone == nil { return false } - return p.IsDone(c, a) + return p.IsDone(ctx, a) } -func (p *Functions) CallShouldContinue(c context.Context, a any, e error) bool { +func (p *Functions) CallShouldContinue(ctx context.Context, a any, e error) bool { if p == nil { return false } if p.ShouldContinue == nil { return false } - return p.ShouldContinue(c, a, e) + return p.ShouldContinue(ctx, a, e) } type BackgroundFunctions struct { From 0a5335d9b4a02d7cf6b816b53077fbb79be856b9 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Sat, 18 Jan 2025 16:14:18 +0300 Subject: [PATCH 106/161] dev: propoer func name --- pkg/controller/common/poller/poller-options.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/controller/common/poller/poller-options.go b/pkg/controller/common/poller/poller-options.go index e1aa5ce90..43a5e9f41 100644 --- a/pkg/controller/common/poller/poller-options.go +++ b/pkg/controller/common/poller/poller-options.go @@ -61,7 +61,7 @@ func (o *Options) FromConfig(config *api.OperatorConfig) *Options { return o } -// SetCreateTimeout sets create timeout +// SetGetErrorTimeout sets get error timeout func (o *Options) SetGetErrorTimeout(timeout time.Duration) *Options { if o == nil { return nil From 696f7ee5038baef933ec71238a79c73a31488428 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Sat, 18 Jan 2025 16:14:46 +0300 Subject: [PATCH 107/161] dev: introduce extended pod --- pkg/interfaces/interfaces-kube.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pkg/interfaces/interfaces-kube.go b/pkg/interfaces/interfaces-kube.go index 1c074ebc1..85efe7b69 100644 --- a/pkg/interfaces/interfaces-kube.go +++ b/pkg/interfaces/interfaces-kube.go @@ -72,6 +72,10 @@ type IKubePod interface { Delete(ctx context.Context, namespace, name string) error } +type IKubePodEx interface { + GetRestartCounters(params ...any) (map[string]int, error) +} + type IKubePVC interface { Create(ctx context.Context, pvc *core.PersistentVolumeClaim) (*core.PersistentVolumeClaim, error) Get(ctx context.Context, namespace, name string) (*core.PersistentVolumeClaim, error) From e6edaec41c675e91a4149eb555d96757a2c1accb Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Sat, 18 Jan 2025 16:15:11 +0300 Subject: [PATCH 108/161] dev: waier --- .../chi/worker-wait-exclude-include-restart.go | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pkg/controller/chi/worker-wait-exclude-include-restart.go b/pkg/controller/chi/worker-wait-exclude-include-restart.go index 16edf6830..8acc08bc2 100644 --- a/pkg/controller/chi/worker-wait-exclude-include-restart.go +++ b/pkg/controller/chi/worker-wait-exclude-include-restart.go @@ -16,6 +16,7 @@ package chi import ( "context" + "github.com/altinity/clickhouse-operator/pkg/interfaces" "time" log "github.com/altinity/clickhouse-operator/pkg/announcer" @@ -354,3 +355,14 @@ func (w *worker) waitHostNoActiveQueries(ctx context.Context, host *api.Host) er return n <= 1 }) } + +// waitHostRestart +func (w *worker) waitHostRestart(ctx context.Context, host *api.Host, start map[string]int) error { + return domain.PollHost(ctx, host, func(ctx context.Context, host *api.Host) bool { + cur, _ := w.c.kube.Pod().(interfaces.IKubePodEx).GetRestartCounters(host) + if !util.MapsAreTheSame(start, cur) { + return false + } + return true + }) +} From 41958683a8ba98c3bfe7f7ab839a760ddd5bed88 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Sat, 18 Jan 2025 16:15:36 +0300 Subject: [PATCH 109/161] dev: formatter --- pkg/controller/chi/worker-wait-exclude-include-restart.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/controller/chi/worker-wait-exclude-include-restart.go b/pkg/controller/chi/worker-wait-exclude-include-restart.go index 8acc08bc2..effe0ffc5 100644 --- a/pkg/controller/chi/worker-wait-exclude-include-restart.go +++ b/pkg/controller/chi/worker-wait-exclude-include-restart.go @@ -16,13 +16,13 @@ package chi import ( "context" - "github.com/altinity/clickhouse-operator/pkg/interfaces" "time" log "github.com/altinity/clickhouse-operator/pkg/announcer" api "github.com/altinity/clickhouse-operator/pkg/apis/clickhouse.altinity.com/v1" "github.com/altinity/clickhouse-operator/pkg/chop" "github.com/altinity/clickhouse-operator/pkg/controller/common/poller/domain" + "github.com/altinity/clickhouse-operator/pkg/interfaces" "github.com/altinity/clickhouse-operator/pkg/util" ) From 55b38f76afb451eda93450ce8e83ad8049fad238 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Sun, 19 Jan 2025 16:27:58 +0300 Subject: [PATCH 110/161] dev: extract tables reconciler --- pkg/controller/chi/worker-chi-reconciler.go | 27 ++++++++++++--------- 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/pkg/controller/chi/worker-chi-reconciler.go b/pkg/controller/chi/worker-chi-reconciler.go index a3ce21571..c99b979e8 100644 --- a/pkg/controller/chi/worker-chi-reconciler.go +++ b/pkg/controller/chi/worker-chi-reconciler.go @@ -774,11 +774,24 @@ func (w *worker) reconcileHostMain(ctx context.Context, host *api.Host) error { Warning("Reconcile Host interrupted with an error 3. Host: %s Err: %v", host.GetName(), err) return err } + // Polish all new volumes that operator has to create _ = w.reconcileHostPVCs(ctx, host) - _ = w.reconcileHostService(ctx, host) + _ = w.reconcileHostTables(ctx, host, migrateTableOpts) + + return nil +} +func (w *worker) reconcileHostPVCs(ctx context.Context, host *api.Host) storage.ErrorDataPersistence { + return storage.NewStorageReconciler( + w.task, + w.c.namer, + storage.NewStoragePVC(w.c.kube.Storage()), + ).ReconcilePVCs(ctx, host, api.DesiredStatefulSet) +} + +func (w *worker) reconcileHostTables(ctx context.Context, host *api.Host, migrateTableOpts *migrateTableOptions) error { // Prepare for tables migration. // Sometimes service needs significant time to start after creation/modification before being accessible for usage // Check whether ClickHouse is running and accessible and what version is available. @@ -791,17 +804,7 @@ func (w *worker) reconcileHostMain(ctx context.Context, host *api.Host) error { M(host).F(). Warning("Check host for ClickHouse availability before migrating tables. Host: %s Failed to get ClickHouse version: %s", host.GetName(), version) } - _ = w.migrateTables(ctx, host, migrateTableOpts) - - return nil -} - -func (w *worker) reconcileHostPVCs(ctx context.Context, host *api.Host) storage.ErrorDataPersistence { - return storage.NewStorageReconciler( - w.task, - w.c.namer, - storage.NewStoragePVC(w.c.kube.Storage()), - ).ReconcilePVCs(ctx, host, api.DesiredStatefulSet) + return w.migrateTables(ctx, host, migrateTableOpts) } // reconcileHostBootstrap reconciles specified ClickHouse host From b0285bf2754b8fe2b7b8c9950ee26e0df333d496 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Sun, 19 Jan 2025 17:30:13 +0300 Subject: [PATCH 111/161] dev: streamline host reconciler --- pkg/controller/chi/worker-chi-reconciler.go | 25 +++++++++++++-------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/pkg/controller/chi/worker-chi-reconciler.go b/pkg/controller/chi/worker-chi-reconciler.go index c99b979e8..686aa7a1f 100644 --- a/pkg/controller/chi/worker-chi-reconciler.go +++ b/pkg/controller/chi/worker-chi-reconciler.go @@ -754,17 +754,10 @@ func (w *worker) reconcileHostMain(ctx context.Context, host *api.Host) error { Info("Reconcile PVCs and check possible data loss for host: %s", host.GetName()) if storage.ErrIsDataLoss(w.reconcileHostPVCs(ctx, host)) { - // In case of data loss detection on existing volumes, we need to: - // 1. recreate StatefulSet - // 2. run tables migration again - stsReconcileOpts = stsReconcileOpts.SetForceRecreate() - migrateTableOpts = &migrateTableOptions{ - forceMigrate: true, - dropReplica: true, - } + stsReconcileOpts, migrateTableOpts = w.reconcileHostPVCsDataLossDetected(host) w.a.V(1). M(host).F(). - Info("Data loss detected for host: %s. Will do force migrate", host.GetName()) + Info("Data loss detected for host: %s.", host.GetName()) } if err := w.reconcileHostStatefulSet(ctx, host, stsReconcileOpts); err != nil { @@ -783,6 +776,20 @@ func (w *worker) reconcileHostMain(ctx context.Context, host *api.Host) error { return nil } +func (w *worker) reconcileHostPVCsDataLossDetected(host *api.Host) (*statefulset.ReconcileOptions, *migrateTableOptions) { + w.a.V(1). + M(host).F(). + Info("Data loss detected for host: %s. Will do force data recovery", host.GetName()) + + // In case of data loss detection on existing volumes, we need to: + // 1. recreate StatefulSet + // 2. run tables migration again + return statefulset.NewReconcileStatefulSetOptions().SetForceRecreate(), &migrateTableOptions{ + forceMigrate: true, + dropReplica: true, + } +} + func (w *worker) reconcileHostPVCs(ctx context.Context, host *api.Host) storage.ErrorDataPersistence { return storage.NewStorageReconciler( w.task, From e01d5a3689b7e3a555337289f8db335b41ca0598 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Sun, 19 Jan 2025 18:46:09 +0300 Subject: [PATCH 112/161] dev: streamline reconcile --- pkg/controller/chi/worker-chi-reconciler.go | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pkg/controller/chi/worker-chi-reconciler.go b/pkg/controller/chi/worker-chi-reconciler.go index 686aa7a1f..773cb2f03 100644 --- a/pkg/controller/chi/worker-chi-reconciler.go +++ b/pkg/controller/chi/worker-chi-reconciler.go @@ -743,15 +743,13 @@ func (w *worker) reconcileHostMain(ctx context.Context, host *api.Host) error { metrics.HostReconcilesErrors(ctx, host.GetCR()) w.a.V(1). M(host).F(). - Warning("Reconcile Host interrupted with an error 2. Host: %s Err: %v", host.GetName(), err) + Warning("Reconcile Host Main interrupted with an error 1. Host: %s Err: %v", host.GetName(), err) return err } w.setHasData(host) - w.a.V(1). - M(host).F(). - Info("Reconcile PVCs and check possible data loss for host: %s", host.GetName()) + w.a.V(1).M(host).F().Info("Reconcile PVCs and data loss for host: %s", host.GetName()) if storage.ErrIsDataLoss(w.reconcileHostPVCs(ctx, host)) { stsReconcileOpts, migrateTableOpts = w.reconcileHostPVCsDataLossDetected(host) @@ -764,7 +762,7 @@ func (w *worker) reconcileHostMain(ctx context.Context, host *api.Host) error { metrics.HostReconcilesErrors(ctx, host.GetCR()) w.a.V(1). M(host).F(). - Warning("Reconcile Host interrupted with an error 3. Host: %s Err: %v", host.GetName(), err) + Warning("Reconcile Host Main interrupted with an error 2. Host: %s Err: %v", host.GetName(), err) return err } From c648b808bb8788715b12ec4a73e7b7aca9dd2b85 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Sun, 19 Jan 2025 19:08:53 +0300 Subject: [PATCH 113/161] dev: pod utils --- pkg/model/k8s/pod.go | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 pkg/model/k8s/pod.go diff --git a/pkg/model/k8s/pod.go b/pkg/model/k8s/pod.go new file mode 100644 index 000000000..13e3678df --- /dev/null +++ b/pkg/model/k8s/pod.go @@ -0,0 +1,33 @@ +// Copyright 2019 Altinity Ltd and/or its affiliates. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package k8s + +import ( + core "k8s.io/api/core/v1" +) + +func PodRestartCountersGet(pod *core.Pod) map[string]int { + if pod == nil { + return nil + } + if len(pod.Status.ContainerStatuses) < 1 { + return nil + } + res := map[string]int{} + for _, containerStatus := range pod.Status.ContainerStatuses { + res[containerStatus.Name] = int(containerStatus.RestartCount) + } + return res +} From a11a2e6028e67dcdba09deb48c9393fc7517f6cd Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Sun, 19 Jan 2025 19:09:08 +0300 Subject: [PATCH 114/161] dev: switch to pod utils --- pkg/controller/chi/kube/pod.go | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/pkg/controller/chi/kube/pod.go b/pkg/controller/chi/kube/pod.go index 463305cf3..e4d0da74f 100644 --- a/pkg/controller/chi/kube/pod.go +++ b/pkg/controller/chi/kube/pod.go @@ -16,6 +16,7 @@ package kube import ( "context" + "github.com/altinity/clickhouse-operator/pkg/model/k8s" apps "k8s.io/api/apps/v1" core "k8s.io/api/core/v1" @@ -72,14 +73,7 @@ func (c *Pod) GetRestartCounters(params ...any) (map[string]int, error) { if err != nil { return nil, err } - if len(pod.Status.ContainerStatuses) < 1 { - return nil, nil - } - res := map[string]int{} - for _, containerStatus := range pod.Status.ContainerStatuses { - res[containerStatus.Name] = int(containerStatus.RestartCount) - } - return res, nil + return k8s.PodRestartCountersGet(pod), nil } // GetAll gets all pods for provided entity From 221fc5899f158a69e36383d145d10fbedd58ad2b Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Sun, 19 Jan 2025 19:12:05 +0300 Subject: [PATCH 115/161] dev: format --- pkg/controller/chi/kube/pod.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/controller/chi/kube/pod.go b/pkg/controller/chi/kube/pod.go index e4d0da74f..ebf6764fd 100644 --- a/pkg/controller/chi/kube/pod.go +++ b/pkg/controller/chi/kube/pod.go @@ -16,7 +16,6 @@ package kube import ( "context" - "github.com/altinity/clickhouse-operator/pkg/model/k8s" apps "k8s.io/api/apps/v1" core "k8s.io/api/core/v1" @@ -25,6 +24,7 @@ import ( api "github.com/altinity/clickhouse-operator/pkg/apis/clickhouse.altinity.com/v1" "github.com/altinity/clickhouse-operator/pkg/controller" "github.com/altinity/clickhouse-operator/pkg/interfaces" + "github.com/altinity/clickhouse-operator/pkg/model/k8s" ) type Pod struct { From ec984d16d4d9105c43ae183cde93de7394434d62 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Sun, 19 Jan 2025 21:26:09 +0300 Subject: [PATCH 116/161] dev: force restart scaffolding --- pkg/controller/chi/worker-chi-reconciler.go | 31 +++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/pkg/controller/chi/worker-chi-reconciler.go b/pkg/controller/chi/worker-chi-reconciler.go index 773cb2f03..029610766 100644 --- a/pkg/controller/chi/worker-chi-reconciler.go +++ b/pkg/controller/chi/worker-chi-reconciler.go @@ -358,6 +358,37 @@ func (w *worker) reconcileHostStatefulSet(ctx context.Context, host *api.Host, o return err } +func (w *worker) hostForceRestart(ctx context.Context, host *api.Host) error { + err := w.hostRestart(ctx, host) + if err != nil { + return err + } + + err = w.hostScaleDown() + if err != nil { + return err + } + + return nil +} + +func (w *worker) hostRestart( ctx context.Context, host *api.Host) error { + restarts, err := w.c.kube.Pod().(interfaces.IKubePodEx).GetRestartCounters(host) + if err != nil { + return err + } + ... + err = w.waitHostRestart(ctx, host, restarts) + if err != nil { + return err + } + return nil +} + +func (w *worker) hostScaleDown() error { + return nil +} + func (w *worker) getHostSoftwareVersion(ctx context.Context, host *api.Host) string { version, _ := w.getHostClickHouseVersion( ctx, From 7db11f68c4109e9c02824bb78e03a9e6ee10cb69 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Sun, 19 Jan 2025 21:49:15 +0300 Subject: [PATCH 117/161] dev: restart scaffolding --- pkg/controller/chi/worker-chi-reconciler.go | 44 ++++++++++++--------- 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/pkg/controller/chi/worker-chi-reconciler.go b/pkg/controller/chi/worker-chi-reconciler.go index 029610766..9fc41f9c8 100644 --- a/pkg/controller/chi/worker-chi-reconciler.go +++ b/pkg/controller/chi/worker-chi-reconciler.go @@ -323,15 +323,10 @@ func (w *worker) reconcileHostStatefulSet(ctx context.Context, host *api.Host, o host.Runtime.CurStatefulSet, _ = w.c.kube.STS().Get(ctx, host) w.a.V(1).M(host).F().Info("Reconcile host: %s. App version: %s", host.GetName(), version) - // In case we have to force-restart host - // We'll do it via replicas: 0 in StatefulSet. + + // Start with force-restart host if w.shouldForceRestartHost(host) { - w.a.V(1).M(host).F().Info("Reconcile host. Shutting down due to force restart: %s", host.GetName()) - w.stsReconciler.PrepareHostStatefulSetWithStatus(ctx, host, true) - _ = w.stsReconciler.ReconcileStatefulSet(ctx, host, false, opts) - metrics.HostReconcilesRestart(ctx, host.GetCR()) - // At this moment StatefulSet has 0 replicas. - // First stage of RollingUpdate completed. + _ = w.hostForceRestart(ctx, host, opts) } // We are in place, where we can reconcile StatefulSet to desired configuration. @@ -358,34 +353,47 @@ func (w *worker) reconcileHostStatefulSet(ctx context.Context, host *api.Host, o return err } -func (w *worker) hostForceRestart(ctx context.Context, host *api.Host) error { - err := w.hostRestart(ctx, host) - if err != nil { - return err - } +func (w *worker) hostForceRestart(ctx context.Context, host *api.Host, opts *statefulset.ReconcileOptions) error { + w.a.V(1).M(host).F().Info("Reconcile host. Force restart: %s", host.GetName()) - err = w.hostScaleDown() - if err != nil { - return err + if w.hostSoftwareRestart(ctx, host) != nil { + _ = w.hostScaleDown(ctx, host, opts) } + metrics.HostReconcilesRestart(ctx, host.GetCR()) return nil } -func (w *worker) hostRestart( ctx context.Context, host *api.Host) error { +func (w *worker) hostSoftwareRestart( ctx context.Context, host *api.Host) error { + w.a.V(1).M(host).F().Info("Reconcile host. Host software restart: %s", host.GetName()) + restarts, err := w.c.kube.Pod().(interfaces.IKubePodEx).GetRestartCounters(host) if err != nil { + w.a.V(1).M(host).F().Info("Host software restart abort 1. Host: %s err: %v", host.GetName(), err) return err } ... err = w.waitHostRestart(ctx, host, restarts) if err != nil { + w.a.V(1).M(host).F().Info("Host software restart abort 2. Host: %s err: %v", host.GetName(), err) return err } + + w.a.V(1).M(host).F().Info("Host software restart success. Host: %s", host.GetName()) return nil } -func (w *worker) hostScaleDown() error { +func (w *worker) hostScaleDown(ctx context.Context, host *api.Host, opts *statefulset.ReconcileOptions) error { + w.a.V(1).M(host).F().Info("Reconcile host. Host shutdown via scale down: %s", host.GetName()) + + w.stsReconciler.PrepareHostStatefulSetWithStatus(ctx, host, true) + err := w.stsReconciler.ReconcileStatefulSet(ctx, host, false, opts) + if err != nil { + w.a.V(1).M(host).F().Info("Host shutdown abort 1. Host: %s err: %v", host.GetName(), err) + return err + } + + w.a.V(1).M(host).F().Info("Host shutdown success. Host: %s", host.GetName()) return nil } From cc69f3c91ef80de5f7e292b96b2d6433b1f12a84 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Sun, 19 Jan 2025 21:56:28 +0300 Subject: [PATCH 118/161] dev: host clichkuse software restart scaffolding --- pkg/controller/chi/worker-chi-reconciler.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pkg/controller/chi/worker-chi-reconciler.go b/pkg/controller/chi/worker-chi-reconciler.go index 9fc41f9c8..0abb1dbb7 100644 --- a/pkg/controller/chi/worker-chi-reconciler.go +++ b/pkg/controller/chi/worker-chi-reconciler.go @@ -372,13 +372,19 @@ func (w *worker) hostSoftwareRestart( ctx context.Context, host *api.Host) erro w.a.V(1).M(host).F().Info("Host software restart abort 1. Host: %s err: %v", host.GetName(), err) return err } - ... - err = w.waitHostRestart(ctx, host, restarts) + + err = w.ensureClusterSchemer(host).HostClickHouseRestart(ctx, host) if err != nil { w.a.V(1).M(host).F().Info("Host software restart abort 2. Host: %s err: %v", host.GetName(), err) return err } + err = w.waitHostRestart(ctx, host, restarts) + if err != nil { + w.a.V(1).M(host).F().Info("Host software restart abort 3. Host: %s err: %v", host.GetName(), err) + return err + } + w.a.V(1).M(host).F().Info("Host software restart success. Host: %s", host.GetName()) return nil } From becaa2a8492d49bb43950f56e02d0d078da91b19 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Mon, 20 Jan 2025 14:04:04 +0300 Subject: [PATCH 119/161] dev: add sql --- pkg/model/chi/schemer/sql.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pkg/model/chi/schemer/sql.go b/pkg/model/chi/schemer/sql.go index 281448358..5cdf88b97 100644 --- a/pkg/model/chi/schemer/sql.go +++ b/pkg/model/chi/schemer/sql.go @@ -257,3 +257,7 @@ func (s *ClusterSchemer) sqlHostInCluster(cluster string) string { cluster, ) } + +func (s *ClusterSchemer) sqlShutDown() []string { + return []string{"SYSTEM SHUTDOWN"} +} From df4220fc1f9fbf92851246b83eaaa88996fb3e1d Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Mon, 20 Jan 2025 14:04:25 +0300 Subject: [PATCH 120/161] dev: introduce shutdown schemer --- pkg/model/chi/schemer/schemer.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pkg/model/chi/schemer/schemer.go b/pkg/model/chi/schemer/schemer.go index 34e6350f0..9831ec519 100644 --- a/pkg/model/chi/schemer/schemer.go +++ b/pkg/model/chi/schemer/schemer.go @@ -152,6 +152,12 @@ func (s *ClusterSchemer) HostClickHouseVersion(ctx context.Context, host *api.Ho return s.QueryHostString(ctx, host, s.sqlVersion()) } +// HostShutdown shutdown a host +func (s *ClusterSchemer) HostShutdown(ctx context.Context, host *api.Host) error { + log.V(1).M(host).F().Info("Host shutdown: %s", host.GetName()) + return s.ExecHost(ctx, host, s.sqlShutDown(), clickhouse.NewQueryOptions().SetRetry(false)) +} + func debugCreateSQLs(names, sqls []string, err error) ([]string, []string) { if err != nil { log.V(1).Warning("got error: %v", err) From 38f132890b367a406784f25c2902f05dd65dae03 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Mon, 20 Jan 2025 14:04:39 +0300 Subject: [PATCH 121/161] dev: call shutdown --- pkg/controller/chi/worker-chi-reconciler.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/controller/chi/worker-chi-reconciler.go b/pkg/controller/chi/worker-chi-reconciler.go index 0abb1dbb7..2e4853f00 100644 --- a/pkg/controller/chi/worker-chi-reconciler.go +++ b/pkg/controller/chi/worker-chi-reconciler.go @@ -373,7 +373,7 @@ func (w *worker) hostSoftwareRestart( ctx context.Context, host *api.Host) erro return err } - err = w.ensureClusterSchemer(host).HostClickHouseRestart(ctx, host) + err = w.ensureClusterSchemer(host).HostShutdown(ctx, host) if err != nil { w.a.V(1).M(host).F().Info("Host software restart abort 2. Host: %s err: %v", host.GetName(), err) return err From 7b5b761b7c551c8d9c3cbedb7ab0bca1140fd86b Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Mon, 20 Jan 2025 14:04:58 +0300 Subject: [PATCH 122/161] dev: format --- pkg/controller/chi/worker-chi-reconciler.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/controller/chi/worker-chi-reconciler.go b/pkg/controller/chi/worker-chi-reconciler.go index 2e4853f00..3fd1683d2 100644 --- a/pkg/controller/chi/worker-chi-reconciler.go +++ b/pkg/controller/chi/worker-chi-reconciler.go @@ -353,7 +353,7 @@ func (w *worker) reconcileHostStatefulSet(ctx context.Context, host *api.Host, o return err } -func (w *worker) hostForceRestart(ctx context.Context, host *api.Host, opts *statefulset.ReconcileOptions) error { +func (w *worker) hostForceRestart(ctx context.Context, host *api.Host, opts *statefulset.ReconcileOptions) error { w.a.V(1).M(host).F().Info("Reconcile host. Force restart: %s", host.GetName()) if w.hostSoftwareRestart(ctx, host) != nil { @@ -364,7 +364,7 @@ func (w *worker) hostForceRestart(ctx context.Context, host *api.Host, opts *st return nil } -func (w *worker) hostSoftwareRestart( ctx context.Context, host *api.Host) error { +func (w *worker) hostSoftwareRestart(ctx context.Context, host *api.Host) error { w.a.V(1).M(host).F().Info("Reconcile host. Host software restart: %s", host.GetName()) restarts, err := w.c.kube.Pod().(interfaces.IKubePodEx).GetRestartCounters(host) @@ -389,7 +389,7 @@ func (w *worker) hostSoftwareRestart( ctx context.Context, host *api.Host) erro return nil } -func (w *worker) hostScaleDown(ctx context.Context, host *api.Host, opts *statefulset.ReconcileOptions) error { +func (w *worker) hostScaleDown(ctx context.Context, host *api.Host, opts *statefulset.ReconcileOptions) error { w.a.V(1).M(host).F().Info("Reconcile host. Host shutdown via scale down: %s", host.GetName()) w.stsReconciler.PrepareHostStatefulSetWithStatus(ctx, host, true) From ac10ae5576d5e3d49edb9dd0ae670ca43a8e3ed3 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Mon, 20 Jan 2025 14:08:09 +0300 Subject: [PATCH 123/161] dev: naming --- pkg/interfaces/{files_group_type.go => files_group.go} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename pkg/interfaces/{files_group_type.go => files_group.go} (100%) diff --git a/pkg/interfaces/files_group_type.go b/pkg/interfaces/files_group.go similarity index 100% rename from pkg/interfaces/files_group_type.go rename to pkg/interfaces/files_group.go From e2b5e6c75104fc55d7c4bb91ea1f0a21b9773871 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Mon, 20 Jan 2025 14:12:02 +0300 Subject: [PATCH 124/161] dev: naming --- .../chi/{worker-chi-reconciler.go => worker-reconciler-chi.go} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename pkg/controller/chi/{worker-chi-reconciler.go => worker-reconciler-chi.go} (100%) diff --git a/pkg/controller/chi/worker-chi-reconciler.go b/pkg/controller/chi/worker-reconciler-chi.go similarity index 100% rename from pkg/controller/chi/worker-chi-reconciler.go rename to pkg/controller/chi/worker-reconciler-chi.go From 775ee47261c1d20f3cc92e67c01b86671e2f990c Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Mon, 20 Jan 2025 14:12:27 +0300 Subject: [PATCH 125/161] dev: naming --- .../chi/{worker-chit-reconciler.go => worker-reconciler-chit.go} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename pkg/controller/chi/{worker-chit-reconciler.go => worker-reconciler-chit.go} (100%) diff --git a/pkg/controller/chi/worker-chit-reconciler.go b/pkg/controller/chi/worker-reconciler-chit.go similarity index 100% rename from pkg/controller/chi/worker-chit-reconciler.go rename to pkg/controller/chi/worker-reconciler-chit.go From a743652e7e3247d182dc889a7bd01029a4aa2f89 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Mon, 20 Jan 2025 14:17:26 +0300 Subject: [PATCH 126/161] dev: introduce reconcile helpers --- .../chi/worker-reconciler-helper.go | 170 ++++++++++++++++++ 1 file changed, 170 insertions(+) create mode 100644 pkg/controller/chi/worker-reconciler-helper.go diff --git a/pkg/controller/chi/worker-reconciler-helper.go b/pkg/controller/chi/worker-reconciler-helper.go new file mode 100644 index 000000000..f84df5314 --- /dev/null +++ b/pkg/controller/chi/worker-reconciler-helper.go @@ -0,0 +1,170 @@ +// Copyright 2019 Altinity Ltd and/or its affiliates. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package chi + +import ( + "context" + api "github.com/altinity/clickhouse-operator/pkg/apis/clickhouse.altinity.com/v1" + "github.com/altinity/clickhouse-operator/pkg/chop" + "github.com/altinity/clickhouse-operator/pkg/controller/common" + "github.com/altinity/clickhouse-operator/pkg/controller/common/statefulset" + "github.com/altinity/clickhouse-operator/pkg/util" + "math" + "sync" +) + +func (w *worker) getHostSoftwareVersion(ctx context.Context, host *api.Host) string { + version, _ := w.getHostClickHouseVersion( + ctx, + host, + versionOptions{ + skipNew: true, + skipStoppedAncestor: true, + }, + ) + return version +} + +// getReconcileShardsWorkersNum calculates how many workers are allowed to be used for concurrent shard reconcile +func (w *worker) getReconcileShardsWorkersNum(shards []*api.ChiShard, opts *common.ReconcileShardsAndHostsOptions) int { + availableWorkers := float64(chop.Config().Reconcile.Runtime.ReconcileShardsThreadsNumber) + maxConcurrencyPercent := float64(chop.Config().Reconcile.Runtime.ReconcileShardsMaxConcurrencyPercent) + _100Percent := float64(100) + shardsNum := float64(len(shards)) + + if opts.FullFanOut { + // For full fan-out scenarios use all available workers. + // Always allow at least 1 worker. + return int(math.Max(availableWorkers, 1)) + } + + // For non-full fan-out scenarios respect .Reconcile.Runtime.ReconcileShardsMaxConcurrencyPercent. + // Always allow at least 1 worker. + maxAllowedWorkers := math.Max(math.Round((maxConcurrencyPercent/_100Percent)*shardsNum), 1) + return int(math.Min(availableWorkers, maxAllowedWorkers)) +} + +func (w *worker) reconcileShardsAndHostsFetchOpts(ctx context.Context) *common.ReconcileShardsAndHostsOptions { + // Try to fetch options + if opts, ok := ctx.Value(common.ReconcileShardsAndHostsOptionsCtxKey).(*common.ReconcileShardsAndHostsOptions); ok { + w.a.V(1).Info("found ReconcileShardsAndHostsOptionsCtxKey") + return opts + } else { + w.a.V(1).Info("not found ReconcileShardsAndHostsOptionsCtxKey, use empty opts") + return &common.ReconcileShardsAndHostsOptions{} + } +} + +func (w *worker) runConcurrently(ctx context.Context, workersNum int, startShardIndex int, shards []*api.ChiShard) error { + if len(shards) == 0 { + return nil + } + + type shardReconcile struct { + shard *api.ChiShard + index int + } + + ch := make(chan *shardReconcile) + wg := sync.WaitGroup{} + + // Launch tasks feeder + wg.Add(1) + go func() { + defer wg.Done() + defer close(ch) + for i, shard := range shards { + ch <- &shardReconcile{ + shard, + startShardIndex + i, + } + } + }() + + // Launch workers + var err error + var errLock sync.Mutex + for i := 0; i < workersNum; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for rq := range ch { + w.a.V(1).Info("Starting shard index: %d on worker", rq.index) + if e := w.reconcileShardWithHosts(ctx, rq.shard); e != nil { + errLock.Lock() + err = e + errLock.Unlock() + } + } + }() + } + + w.a.V(1).Info("Starting to wait shards from index: %d on workers.", startShardIndex) + wg.Wait() + w.a.V(1).Info("Finished to wait shards from index: %d on workers.", startShardIndex) + return err +} + +func (w *worker) runConcurrentlyInBatches(ctx context.Context, workersNum int, start int, shards []*api.ChiShard) error { + for startShardIndex := 0; startShardIndex < len(shards); startShardIndex += workersNum { + endShardIndex := util.IncTopped(startShardIndex, workersNum, len(shards)) + concurrentlyProcessedShards := shards[startShardIndex:endShardIndex] + w.a.V(1).Info("Starting shards from index: %d on workers. Shards indexes [%d:%d)", start+startShardIndex, start+startShardIndex, start+endShardIndex) + + // Processing error protected with mutex + var err error + var errLock sync.Mutex + + wg := sync.WaitGroup{} + wg.Add(len(concurrentlyProcessedShards)) + // Launch shard concurrent processing + for j := range concurrentlyProcessedShards { + shard := concurrentlyProcessedShards[j] + w.a.V(1).Info("Starting shard on worker. Shard index: %d", start+startShardIndex+j) + go func() { + defer wg.Done() + w.a.V(1).Info("Starting shard on goroutine. Shard index: %d", start+startShardIndex+j) + if e := w.reconcileShardWithHosts(ctx, shard); e != nil { + errLock.Lock() + err = e + errLock.Unlock() + } + w.a.V(1).Info("Finished shard on goroutine. Shard index: %d", start+startShardIndex+j) + }() + } + w.a.V(1).Info("Starting to wait shards from index: %d on workers. Shards indexes [%d:%d)", start+startShardIndex, start+startShardIndex, start+endShardIndex) + wg.Wait() + w.a.V(1).Info("Finished to wait shards from index: %d on workers. Shards indexes [%d:%d)", start+startShardIndex, start+startShardIndex, start+endShardIndex) + if err != nil { + w.a.V(1).Warning("Skipping rest of shards due to an error: %v", err) + return err + } + } + return nil +} + +func (w *worker) hostPVCsDataLossDetected(host *api.Host) (*statefulset.ReconcileOptions, *migrateTableOptions) { + w.a.V(1). + M(host).F(). + Info("Data loss detected for host: %s. Will do force data recovery", host.GetName()) + + // In case of data loss detection on existing volumes, we need to: + // 1. recreate StatefulSet + // 2. run tables migration again + return statefulset.NewReconcileStatefulSetOptions().SetForceRecreate(), &migrateTableOptions{ + forceMigrate: true, + dropReplica: true, + } +} From 3af5fe3ba65e3a373e16c7bf1b175ab90f1774b1 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Mon, 20 Jan 2025 14:17:40 +0300 Subject: [PATCH 127/161] dev: switch to helpers --- pkg/controller/chi/worker-reconciler-chi.go | 133 -------------------- 1 file changed, 133 deletions(-) diff --git a/pkg/controller/chi/worker-reconciler-chi.go b/pkg/controller/chi/worker-reconciler-chi.go index 3fd1683d2..e04b22f89 100644 --- a/pkg/controller/chi/worker-reconciler-chi.go +++ b/pkg/controller/chi/worker-reconciler-chi.go @@ -17,14 +17,11 @@ package chi import ( "context" "errors" - "math" - "sync" "time" log "github.com/altinity/clickhouse-operator/pkg/announcer" api "github.com/altinity/clickhouse-operator/pkg/apis/clickhouse.altinity.com/v1" "github.com/altinity/clickhouse-operator/pkg/apis/common/types" - "github.com/altinity/clickhouse-operator/pkg/chop" "github.com/altinity/clickhouse-operator/pkg/controller/chi/metrics" "github.com/altinity/clickhouse-operator/pkg/controller/common" a "github.com/altinity/clickhouse-operator/pkg/controller/common/announcer" @@ -403,18 +400,6 @@ func (w *worker) hostScaleDown(ctx context.Context, host *api.Host, opts *statef return nil } -func (w *worker) getHostSoftwareVersion(ctx context.Context, host *api.Host) string { - version, _ := w.getHostClickHouseVersion( - ctx, - host, - versionOptions{ - skipNew: true, - skipStoppedAncestor: true, - }, - ) - return version -} - // reconcileHostService reconciles host's Service func (w *worker) reconcileHostService(ctx context.Context, host *api.Host) error { if util.IsContextDone(ctx) { @@ -484,36 +469,6 @@ func (w *worker) reconcileClusterSecret(ctx context.Context, cluster *api.Cluste } } -// getReconcileShardsWorkersNum calculates how many workers are allowed to be used for concurrent shard reconcile -func (w *worker) getReconcileShardsWorkersNum(shards []*api.ChiShard, opts *common.ReconcileShardsAndHostsOptions) int { - availableWorkers := float64(chop.Config().Reconcile.Runtime.ReconcileShardsThreadsNumber) - maxConcurrencyPercent := float64(chop.Config().Reconcile.Runtime.ReconcileShardsMaxConcurrencyPercent) - _100Percent := float64(100) - shardsNum := float64(len(shards)) - - if opts.FullFanOut { - // For full fan-out scenarios use all available workers. - // Always allow at least 1 worker. - return int(math.Max(availableWorkers, 1)) - } - - // For non-full fan-out scenarios respect .Reconcile.Runtime.ReconcileShardsMaxConcurrencyPercent. - // Always allow at least 1 worker. - maxAllowedWorkers := math.Max(math.Round((maxConcurrencyPercent/_100Percent)*shardsNum), 1) - return int(math.Min(availableWorkers, maxAllowedWorkers)) -} - -func (w *worker) reconcileShardsAndHostsFetchOpts(ctx context.Context) *common.ReconcileShardsAndHostsOptions { - // Try to fetch options - if opts, ok := ctx.Value(common.ReconcileShardsAndHostsOptionsCtxKey).(*common.ReconcileShardsAndHostsOptions); ok { - w.a.V(1).Info("found ReconcileShardsAndHostsOptionsCtxKey") - return opts - } else { - w.a.V(1).Info("not found ReconcileShardsAndHostsOptionsCtxKey, use empty opts") - return &common.ReconcileShardsAndHostsOptions{} - } -} - // reconcileShardsAndHosts reconciles shards and hosts of each shard func (w *worker) reconcileShardsAndHosts(ctx context.Context, shards []*api.ChiShard) error { // Sanity check - has to have shard(s) @@ -558,94 +513,6 @@ func (w *worker) reconcileShardsAndHosts(ctx context.Context, shards []*api.ChiS return nil } -func (w *worker) runConcurrently(ctx context.Context, workersNum int, startShardIndex int, shards []*api.ChiShard) error { - if len(shards) == 0 { - return nil - } - - type shardReconcile struct { - shard *api.ChiShard - index int - } - - ch := make(chan *shardReconcile) - wg := sync.WaitGroup{} - - // Launch tasks feeder - wg.Add(1) - go func() { - defer wg.Done() - defer close(ch) - for i, shard := range shards { - ch <- &shardReconcile{ - shard, - startShardIndex + i, - } - } - }() - - // Launch workers - var err error - var errLock sync.Mutex - for i := 0; i < workersNum; i++ { - wg.Add(1) - go func() { - defer wg.Done() - for rq := range ch { - w.a.V(1).Info("Starting shard index: %d on worker", rq.index) - if e := w.reconcileShardWithHosts(ctx, rq.shard); e != nil { - errLock.Lock() - err = e - errLock.Unlock() - } - } - }() - } - - w.a.V(1).Info("Starting to wait shards from index: %d on workers.", startShardIndex) - wg.Wait() - w.a.V(1).Info("Finished to wait shards from index: %d on workers.", startShardIndex) - return err -} - -func (w *worker) runConcurrentlyInBatches(ctx context.Context, workersNum int, start int, shards []*api.ChiShard) error { - for startShardIndex := 0; startShardIndex < len(shards); startShardIndex += workersNum { - endShardIndex := util.IncTopped(startShardIndex, workersNum, len(shards)) - concurrentlyProcessedShards := shards[startShardIndex:endShardIndex] - w.a.V(1).Info("Starting shards from index: %d on workers. Shards indexes [%d:%d)", start+startShardIndex, start+startShardIndex, start+endShardIndex) - - // Processing error protected with mutex - var err error - var errLock sync.Mutex - - wg := sync.WaitGroup{} - wg.Add(len(concurrentlyProcessedShards)) - // Launch shard concurrent processing - for j := range concurrentlyProcessedShards { - shard := concurrentlyProcessedShards[j] - w.a.V(1).Info("Starting shard on worker. Shard index: %d", start+startShardIndex+j) - go func() { - defer wg.Done() - w.a.V(1).Info("Starting shard on goroutine. Shard index: %d", start+startShardIndex+j) - if e := w.reconcileShardWithHosts(ctx, shard); e != nil { - errLock.Lock() - err = e - errLock.Unlock() - } - w.a.V(1).Info("Finished shard on goroutine. Shard index: %d", start+startShardIndex+j) - }() - } - w.a.V(1).Info("Starting to wait shards from index: %d on workers. Shards indexes [%d:%d)", start+startShardIndex, start+startShardIndex, start+endShardIndex) - wg.Wait() - w.a.V(1).Info("Finished to wait shards from index: %d on workers. Shards indexes [%d:%d)", start+startShardIndex, start+startShardIndex, start+endShardIndex) - if err != nil { - w.a.V(1).Warning("Skipping rest of shards due to an error: %v", err) - return err - } - } - return nil -} - func (w *worker) reconcileShardWithHosts(ctx context.Context, shard api.IShard) error { if err := w.reconcileShard(ctx, shard); err != nil { return err From 3f67e010405aa5a556f453106bc71c8bd97dcaee Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Mon, 20 Jan 2025 14:21:53 +0300 Subject: [PATCH 128/161] dev: remove unnesessary parts --- pkg/controller/chi/worker-reconciler-chi.go | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/pkg/controller/chi/worker-reconciler-chi.go b/pkg/controller/chi/worker-reconciler-chi.go index e04b22f89..f4c2295d0 100644 --- a/pkg/controller/chi/worker-reconciler-chi.go +++ b/pkg/controller/chi/worker-reconciler-chi.go @@ -686,20 +686,6 @@ func (w *worker) reconcileHostMain(ctx context.Context, host *api.Host) error { return nil } -func (w *worker) reconcileHostPVCsDataLossDetected(host *api.Host) (*statefulset.ReconcileOptions, *migrateTableOptions) { - w.a.V(1). - M(host).F(). - Info("Data loss detected for host: %s. Will do force data recovery", host.GetName()) - - // In case of data loss detection on existing volumes, we need to: - // 1. recreate StatefulSet - // 2. run tables migration again - return statefulset.NewReconcileStatefulSetOptions().SetForceRecreate(), &migrateTableOptions{ - forceMigrate: true, - dropReplica: true, - } -} - func (w *worker) reconcileHostPVCs(ctx context.Context, host *api.Host) storage.ErrorDataPersistence { return storage.NewStorageReconciler( w.task, From 24ea483f748ef1beb857a2ece1cc3ab4d83979a4 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Mon, 20 Jan 2025 14:22:13 +0300 Subject: [PATCH 129/161] dev: switch to helpers --- pkg/controller/chi/worker-reconciler-chi.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/controller/chi/worker-reconciler-chi.go b/pkg/controller/chi/worker-reconciler-chi.go index f4c2295d0..18ac0323f 100644 --- a/pkg/controller/chi/worker-reconciler-chi.go +++ b/pkg/controller/chi/worker-reconciler-chi.go @@ -664,7 +664,7 @@ func (w *worker) reconcileHostMain(ctx context.Context, host *api.Host) error { w.a.V(1).M(host).F().Info("Reconcile PVCs and data loss for host: %s", host.GetName()) if storage.ErrIsDataLoss(w.reconcileHostPVCs(ctx, host)) { - stsReconcileOpts, migrateTableOpts = w.reconcileHostPVCsDataLossDetected(host) + stsReconcileOpts, migrateTableOpts = w.hostPVCsDataLossDetected(host) w.a.V(1). M(host).F(). Info("Data loss detected for host: %s.", host.GetName()) From ef87ff13ef95be7f3df4f60d57625b8aa582415c Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Mon, 20 Jan 2025 16:07:24 +0300 Subject: [PATCH 130/161] dev: update deps --- go.mod | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/go.mod b/go.mod index e6d1c223e..ab79839b8 100644 --- a/go.mod +++ b/go.mod @@ -46,7 +46,7 @@ require ( go.opentelemetry.io/otel/metric v1.24.0 go.opentelemetry.io/otel/sdk v1.24.0 go.opentelemetry.io/otel/sdk/metric v1.24.0 - golang.org/x/sync v0.3.0 + golang.org/x/sync v0.10.0 gopkg.in/d4l3k/messagediff.v1 v1.2.1 gopkg.in/yaml.v3 v3.0.1 sigs.k8s.io/controller-runtime v0.15.1 @@ -93,14 +93,14 @@ require ( go.uber.org/atomic v1.7.0 // indirect go.uber.org/multierr v1.6.0 // indirect go.uber.org/zap v1.24.0 // indirect - golang.org/x/mod v0.10.0 // indirect - golang.org/x/net v0.23.0 // indirect + golang.org/x/mod v0.17.0 // indirect + golang.org/x/net v0.33.0 // indirect golang.org/x/oauth2 v0.12.0 // indirect - golang.org/x/sys v0.18.0 // indirect - golang.org/x/term v0.18.0 // indirect - golang.org/x/text v0.14.0 // indirect + golang.org/x/sys v0.28.0 // indirect + golang.org/x/term v0.27.0 // indirect + golang.org/x/text v0.21.0 // indirect golang.org/x/time v0.3.0 // indirect - golang.org/x/tools v0.9.1 // indirect + golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d // indirect gomodules.xyz/jsonpatch/v2 v2.3.0 // indirect google.golang.org/appengine v1.6.7 // indirect google.golang.org/protobuf v1.33.0 // indirect From f44eb213ac1f0d4100790d4d507c8524a12e44d3 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Mon, 20 Jan 2025 16:07:45 +0300 Subject: [PATCH 131/161] dev: sum file --- go.sum | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/go.sum b/go.sum index d04b39587..945311d1e 100644 --- a/go.sum +++ b/go.sum @@ -537,8 +537,8 @@ golang.org/x/mod v0.1.1-0.20191107180719-034126e5016b/go.mod h1:QqPTAvyqsEbceGzB golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.10.0 h1:lFO9qtOdlre5W1jxS3r/4szv2/6iXxScdzjoBMXNhYk= -golang.org/x/mod v0.10.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= +golang.org/x/mod v0.17.0 h1:zY54UmvipHiNd+pm+m0x9KhZ9hl1/7QNMyxXbc6ICqA= +golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -576,8 +576,8 @@ golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwY golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk= -golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs= -golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= +golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I= +golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -596,8 +596,8 @@ golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E= -golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= +golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ= +golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -647,20 +647,20 @@ golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= -golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA= +golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= -golang.org/x/term v0.18.0 h1:FcHjZXDMxI8mM3nwhX9HlKop4C0YQvCVCdwYl2wOtE8= -golang.org/x/term v0.18.0/go.mod h1:ILwASektA3OnRv7amZ1xhE/KTR+u50pbXfZ03+6Nx58= +golang.org/x/term v0.27.0 h1:WP60Sv1nlK1T6SupCHbXzSaN0b9wUmsPoRS9b61A23Q= +golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= -golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= +golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= @@ -720,8 +720,8 @@ golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4f golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.1.3/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= -golang.org/x/tools v0.9.1 h1:8WMNJAz3zrtPmnYC7ISf5dEn3MT0gY7jBJfw27yrrLo= -golang.org/x/tools v0.9.1/go.mod h1:owI94Op576fPu3cIGQeHs3joujW/2Oc6MtlxbF5dfNc= +golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d h1:vU5i/LfpvrRCpgM/VPfJLg5KjxD3E+hfT1SH+d9zLwg= +golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= From 8f5105157db6e9e7a7747569578172a02f9dc2ab Mon Sep 17 00:00:00 2001 From: alz Date: Tue, 21 Jan 2025 11:47:54 +0300 Subject: [PATCH 132/161] Add a secret for all-sharded cluster if there is a one defined for the first one. --- pkg/model/chi/config/generator.go | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pkg/model/chi/config/generator.go b/pkg/model/chi/config/generator.go index 01cf41172..e7e1de340 100644 --- a/pkg/model/chi/config/generator.go +++ b/pkg/model/chi/config/generator.go @@ -338,6 +338,19 @@ func (c *Generator) getRemoteServers(selector *config.HostSelector) string { // clusterName = AllShardsOneReplicaClusterName util.Iline(b, 8, "<%s>", clusterName) + + // Add secret to all-sharded from the first cluster if present + cluster := c.cr.FindCluster(0) + // VALUE + switch cluster.GetSecret().Source() { + case chi.ClusterSecretSourcePlaintext: + // Secret value is explicitly specified + util.Iline(b, 12, "%s", cluster.GetSecret().Value) + case chi.ClusterSecretSourceSecretRef, chi.ClusterSecretSourceAuto: + // Use secret via ENV var from secret + util.Iline(b, 12, ``, InternodeClusterSecretEnvName) + } + c.cr.WalkHosts(func(host *chi.Host) error { if selector.Include(host) { // From 0b971cdee189aa38f18b00d126dbdc7a958c2cf0 Mon Sep 17 00:00:00 2001 From: alz Date: Tue, 21 Jan 2025 12:51:14 +0300 Subject: [PATCH 133/161] Extend test_039 to check for all-sharded cluster as well --- tests/e2e/test_operator.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/e2e/test_operator.py b/tests/e2e/test_operator.py index dfa2cceeb..ae1a786d4 100644 --- a/tests/e2e/test_operator.py +++ b/tests/e2e/test_operator.py @@ -4023,10 +4023,17 @@ def test_039(self, step=0, delete_chi=0): with Then("Select in cluster with no secret should fail"): r = clickhouse.query_with_error(chi, "SELECT count(a) FROM secure_dist", pwd="qkrq") assert "AUTHENTICATION_FAILED" in r + with And("Select from all-sharded with no secret should fail"): + r = clickhouse.query_with_error(chi, "SELECT * FROM cluster('all-sharded', system.one)", pwd="qkrq") + assert "AUTHENTICATION_FAILED" in r if step > 0: with Then("Select in cluster with secret should pass"): r = clickhouse.query(chi, "SELECT count() FROM secure_dist", pwd="qkrq") assert r == "10" + with And("Select from all-sharded with secret should pass"): + r = clickhouse.query_with_error(chi, "SELECT * FROM cluster('all-sharded', system.one) limit 1", pwd="qkrq") + assert r == "0" + if step == 4: with Then("Create replicated table to test interserver_https_port"): From 122327ea50e4d40894c6eed7d5c20b66331b052b Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Tue, 21 Jan 2025 13:51:55 +0300 Subject: [PATCH 134/161] dev: disable to streamline --- pkg/controller/chi/worker-reconciler-chi.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pkg/controller/chi/worker-reconciler-chi.go b/pkg/controller/chi/worker-reconciler-chi.go index 18ac0323f..4f811c34e 100644 --- a/pkg/controller/chi/worker-reconciler-chi.go +++ b/pkg/controller/chi/worker-reconciler-chi.go @@ -17,6 +17,7 @@ package chi import ( "context" "errors" + "fmt" "time" log "github.com/altinity/clickhouse-operator/pkg/announcer" @@ -362,6 +363,8 @@ func (w *worker) hostForceRestart(ctx context.Context, host *api.Host, opts *sta } func (w *worker) hostSoftwareRestart(ctx context.Context, host *api.Host) error { + return fmt.Errorf("so be it") + w.a.V(1).M(host).F().Info("Reconcile host. Host software restart: %s", host.GetName()) restarts, err := w.c.kube.Pod().(interfaces.IKubePodEx).GetRestartCounters(host) From fe3d2d1cf837b97e6f90736b05f6d66f91d2ff88 Mon Sep 17 00:00:00 2001 From: alz Date: Tue, 21 Jan 2025 20:36:21 +0300 Subject: [PATCH 135/161] Retry failed tests for a second time --- .github/workflows/run_tests.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run_tests.yaml b/.github/workflows/run_tests.yaml index 94defc7b3..f62fc1c2a 100644 --- a/.github/workflows/run_tests.yaml +++ b/.github/workflows/run_tests.yaml @@ -93,7 +93,7 @@ jobs: test_mode="--test-to-end" fi - ~/venv/qa/bin/python3 ./tests/regression.py --only=/regression/e2e.test_operator/${ONLY} $test_mode --trim-results on -o short --native --log ./tests/raw.log + ~/venv/qa/bin/python3 ./tests/regression.py --only=/regression/e2e.test_operator/${ONLY} --repeat=/regression/e2e.test_operator/${ONLY},2,pass $test_mode --trim-results on -o short --native --log ./tests/raw.log test_result=$? ~/venv/qa/bin/tfs --no-colors transform compact ./tests/raw.log ./tests/compact.log ~/venv/qa/bin/tfs --no-colors transform nice ./tests/raw.log ./tests/nice.log.txt From 68a28f06d80ff8b8e0458c5302c3bb95f39096d9 Mon Sep 17 00:00:00 2001 From: alz Date: Tue, 21 Jan 2025 20:57:22 +0300 Subject: [PATCH 136/161] Remove --repeat since it does not work --- .github/workflows/run_tests.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run_tests.yaml b/.github/workflows/run_tests.yaml index f62fc1c2a..94defc7b3 100644 --- a/.github/workflows/run_tests.yaml +++ b/.github/workflows/run_tests.yaml @@ -93,7 +93,7 @@ jobs: test_mode="--test-to-end" fi - ~/venv/qa/bin/python3 ./tests/regression.py --only=/regression/e2e.test_operator/${ONLY} --repeat=/regression/e2e.test_operator/${ONLY},2,pass $test_mode --trim-results on -o short --native --log ./tests/raw.log + ~/venv/qa/bin/python3 ./tests/regression.py --only=/regression/e2e.test_operator/${ONLY} $test_mode --trim-results on -o short --native --log ./tests/raw.log test_result=$? ~/venv/qa/bin/tfs --no-colors transform compact ./tests/raw.log ./tests/compact.log ~/venv/qa/bin/tfs --no-colors transform nice ./tests/raw.log ./tests/nice.log.txt From 95d7f055a19e8ee99fdbe34786bd83ea8c089f68 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Tue, 21 Jan 2025 21:06:39 +0300 Subject: [PATCH 137/161] dev: enable soft restart --- pkg/controller/chi/worker-reconciler-chi.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pkg/controller/chi/worker-reconciler-chi.go b/pkg/controller/chi/worker-reconciler-chi.go index 4f811c34e..db0ae8c18 100644 --- a/pkg/controller/chi/worker-reconciler-chi.go +++ b/pkg/controller/chi/worker-reconciler-chi.go @@ -17,7 +17,6 @@ package chi import ( "context" "errors" - "fmt" "time" log "github.com/altinity/clickhouse-operator/pkg/announcer" @@ -363,7 +362,7 @@ func (w *worker) hostForceRestart(ctx context.Context, host *api.Host, opts *sta } func (w *worker) hostSoftwareRestart(ctx context.Context, host *api.Host) error { - return fmt.Errorf("so be it") + //return fmt.Errorf("so be it") w.a.V(1).M(host).F().Info("Reconcile host. Host software restart: %s", host.GetName()) From b9c2787988e154768f63ab61e9ef33d94ef5a694 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Tue, 21 Jan 2025 21:10:06 +0300 Subject: [PATCH 138/161] test: fix test 16 --- tests/e2e/test_operator.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/e2e/test_operator.py b/tests/e2e/test_operator.py index ae1a786d4..6c4ce24f3 100644 --- a/tests/e2e/test_operator.py +++ b/tests/e2e/test_operator.py @@ -1941,7 +1941,7 @@ def test_016(self): # test-016-settings-04.yaml with When("Add new custom4.xml config file"): - start_time = kubectl.get_field("pod", f"chi-{chi}-default-0-0-0", ".status.startTime") + start_time = kubectl.get_field("pod", f"chi-{chi}-default-0-0-0", ".status.containerStatuses[0].state.running.startedAt") kubectl.create_and_check( manifest="manifests/chi/test-016-settings-04.yaml", check={ @@ -1962,12 +1962,12 @@ def test_016(self): assert out == "test-custom4" with And("ClickHouse SHOULD BE restarted"): - new_start_time = kubectl.get_field("pod", f"chi-{chi}-default-0-0-0", ".status.startTime") + new_start_time = kubectl.get_field("pod", f"chi-{chi}-default-0-0-0", ".status.containerStatuses[0].state.running.startedAt") assert start_time < new_start_time # test-016-settings-05.yaml with When("Add a change to an existing xml file"): - start_time = kubectl.get_field("pod", f"chi-{chi}-default-0-0-0", ".status.startTime") + start_time = kubectl.get_field("pod", f"chi-{chi}-default-0-0-0", ".status.containerStatuses[0].state.running.startedAt") kubectl.create_and_check( manifest="manifests/chi/test-016-settings-05.yaml", check={ @@ -1976,7 +1976,7 @@ def test_016(self): ) with And("ClickHouse SHOULD BE restarted"): - new_start_time = kubectl.get_field("pod", f"chi-{chi}-default-0-0-0", ".status.startTime") + new_start_time = kubectl.get_field("pod", f"chi-{chi}-default-0-0-0", ".status.containerStatuses[0].state.running.startedAt") assert start_time < new_start_time with And("Macro 'test' value should be changed"): @@ -1988,7 +1988,7 @@ def test_016(self): # test-016-settings-06.yaml with When("Add I change a number of settings that does not requre a restart"): - start_time = kubectl.get_field("pod", f"chi-{chi}-default-0-0-0", ".status.startTime") + start_time = kubectl.get_field("pod", f"chi-{chi}-default-0-0-0", ".status.containerStatuses[0].state.running.startedAt") kubectl.create_and_check( manifest="manifests/chi/test-016-settings-06.yaml", check={ @@ -1997,7 +1997,7 @@ def test_016(self): ) with And("ClickHouse SHOULD NOT BE restarted"): - new_start_time = kubectl.get_field("pod", f"chi-{chi}-default-0-0-0", ".status.startTime") + new_start_time = kubectl.get_field("pod", f"chi-{chi}-default-0-0-0", ".status.containerStatuses[0].state.running.startedAt") assert start_time == new_start_time with Finally("I clean up"): From aea268c4dc2767a40893196deb5f69db6b3f57b1 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Tue, 21 Jan 2025 21:10:37 +0300 Subject: [PATCH 139/161] test: typo --- tests/e2e/test_operator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/e2e/test_operator.py b/tests/e2e/test_operator.py index 6c4ce24f3..f78c9454f 100644 --- a/tests/e2e/test_operator.py +++ b/tests/e2e/test_operator.py @@ -1987,7 +1987,7 @@ def test_016(self): assert out == "test-changed" # test-016-settings-06.yaml - with When("Add I change a number of settings that does not requre a restart"): + with When("Add I change a number of settings that does not require a restart"): start_time = kubectl.get_field("pod", f"chi-{chi}-default-0-0-0", ".status.containerStatuses[0].state.running.startedAt") kubectl.create_and_check( manifest="manifests/chi/test-016-settings-06.yaml", From 7d871193c693d64ef6ff17ccc7ff3ce69b7f3439 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Wed, 22 Jan 2025 13:13:36 +0300 Subject: [PATCH 140/161] test: 28 --- tests/e2e/test_operator.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/e2e/test_operator.py b/tests/e2e/test_operator.py index f78c9454f..96fc57e2b 100644 --- a/tests/e2e/test_operator.py +++ b/tests/e2e/test_operator.py @@ -3064,7 +3064,7 @@ def test_028(self): out = clickhouse.query_with_error(chi, "SELECT count(sleepEachRow(1)) FROM numbers(30) SETTINGS function_sleep_max_microseconds_per_block=0") assert out == "30" - pod_start_time = kubectl.get_field("pod", f"chi-{chi}-default-0-0-0", ".status.startTime") + pod_start_time = kubectl.get_field("pod", f"chi-{chi}-default-0-0-0", ".status.containerStatuses[0].state.running.startedAt") with Then("Operator should start processing a change"): # TODO: Test needs to be improved kubectl.wait_chi_status(chi, "InProgress") @@ -3101,7 +3101,7 @@ def test_028(self): # print("Waiting 5 seconds") time.sleep(5) end_time = time.time() - new_pod_start_time = kubectl.get_field("pod", f"chi-{chi}-default-0-0-0", ".status.startTime") + new_pod_start_time = kubectl.get_field("pod", f"chi-{chi}-default-0-0-0", ".status.containerStatuses[0].state.running.startedAt") print(f"Total restart time: {str(round(end_time - start_time))}") print(f"First replica downtime: {ch1_downtime}") print(f"Second replica downtime: {ch2_downtime}") @@ -3117,7 +3117,7 @@ def test_028(self): note("Restart is cleaned automatically") else: note("Restart needs to be cleaned") - start_time = kubectl.get_field("pod", f"chi-{chi}-default-0-0-0", ".status.startTime") + start_time = kubectl.get_field("pod", f"chi-{chi}-default-0-0-0", ".status.containerStatuses[0].state.running.startedAt") # We need to clear RollingUpdate restart policy because of new operator's IP address emerging sometimes with Then("Clear RollingUpdate restart policy"): @@ -3139,7 +3139,7 @@ def test_028(self): with Then("Re-apply the original config. CHI should not be restarted"): kubectl.create_and_check(manifest=manifest, check={"do_not_delete": 1}) - new_start_time = kubectl.get_field("pod", f"chi-{chi}-default-0-0-0", ".status.startTime") + new_start_time = kubectl.get_field("pod", f"chi-{chi}-default-0-0-0", ".status.containerStatuses[0].state.running.startedAt") print(f"old_start_time: {start_time}") print(f"new_start_time: {new_start_time}") assert start_time == new_start_time From cde3056eb194e736306713c28e49a6bef0f03852 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Thu, 23 Jan 2025 12:20:53 +0300 Subject: [PATCH 141/161] dev: add pod helpers --- pkg/model/k8s/pod.go | 63 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/pkg/model/k8s/pod.go b/pkg/model/k8s/pod.go index 13e3678df..07f54bb74 100644 --- a/pkg/model/k8s/pod.go +++ b/pkg/model/k8s/pod.go @@ -31,3 +31,66 @@ func PodRestartCountersGet(pod *core.Pod) map[string]int { } return res } + +func PodHasCrushedContainers(pod *core.Pod) bool { + // pod.Status.ContainerStatuses[0].State.Waiting.Reason + for _, containerStatus := range pod.Status.ContainerStatuses { + if containerStatus.State.Waiting != nil { + if containerStatus.State.Waiting.Reason == "CrashLoopBackOff" { + // Crashed + return true + } + } + } + // No crashed + return false +} + +func PodHasNotReadyContainers(pod *core.Pod) bool { + for _, containerStatus := range pod.Status.ContainerStatuses { + if !containerStatus.Ready { + // Not ready + return true + } + } + // All are ready + return false +} + +func PodHasNotStartedContainers(pod *core.Pod) bool { + res := true + for _, containerStatus := range pod.Status.ContainerStatuses { + if containerStatus.Started != nil { + if *containerStatus.Started { + // Started + continue + } + } + // Not started + res = false + } + return res +} + +func PodPhaseIsRunning(pod *core.Pod) bool { + return pod.Status.Phase == core.PodRunning +} + +func IsPodOK(pod *core.Pod) bool { + if len(pod.Status.ContainerStatuses) < 1 { + return false + } + if PodHasCrushedContainers(pod) { + return false + } + if PodHasNotReadyContainers(pod) { + return false + } + if PodHasNotStartedContainers(pod) { + return false + } + if !PodPhaseIsRunning(pod) { + return false + } + return true +} From 40c99c94594d009ca5003dda5500603058516773 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Thu, 23 Jan 2025 12:21:17 +0300 Subject: [PATCH 142/161] dev: log messages --- pkg/controller/common/statefulset/statefulset-reconciler.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/controller/common/statefulset/statefulset-reconciler.go b/pkg/controller/common/statefulset/statefulset-reconciler.go index 437ec0d31..70ebd3805 100644 --- a/pkg/controller/common/statefulset/statefulset-reconciler.go +++ b/pkg/controller/common/statefulset/statefulset-reconciler.go @@ -313,7 +313,7 @@ func (r *Reconciler) createStatefulSet(ctx context.Context, host *api.Host, regi WithEvent(host.GetCR(), a.EventActionCreate, a.EventReasonCreateStarted). WithAction(host.GetCR()). M(host).F(). - Info("Create StatefulSet %s - started", util.NamespaceNameString(statefulSet)) + Info("Create StatefulSet: %s - started", util.NamespaceNameString(statefulSet)) action := r.doCreateStatefulSet(ctx, host, opts) From 28ae1daf97ec01ab3d4c108f38ed6cb8d3910ed7 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Thu, 23 Jan 2025 12:22:28 +0300 Subject: [PATCH 143/161] dev: add waiter functions --- .../worker-wait-exclude-include-restart.go | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/pkg/controller/chi/worker-wait-exclude-include-restart.go b/pkg/controller/chi/worker-wait-exclude-include-restart.go index effe0ffc5..e298d8e57 100644 --- a/pkg/controller/chi/worker-wait-exclude-include-restart.go +++ b/pkg/controller/chi/worker-wait-exclude-include-restart.go @@ -366,3 +366,33 @@ func (w *worker) waitHostRestart(ctx context.Context, host *api.Host, start map[ return true }) } + +// waitHostIsReady +func (w *worker) waitHostIsReady(ctx context.Context, host *api.Host) error { + return domain.PollHost(ctx, host, func(ctx context.Context, host *api.Host) bool { + if w.isPodReady(host) { + return false + } + return true + }) +} + +// waitHostIsStarted +func (w *worker) waitHostIsStarted(ctx context.Context, host *api.Host) error { + return domain.PollHost(ctx, host, func(ctx context.Context, host *api.Host) bool { + if w.isPodStarted(host) { + return false + } + return true + }) +} + +// waitHostIsRunning +func (w *worker) waitHostIsRunning(ctx context.Context, host *api.Host) error { + return domain.PollHost(ctx, host, func(ctx context.Context, host *api.Host) bool { + if w.isPodRunning(host) { + return false + } + return true + }) +} From a9623374eb37faaa8986ccfd6aa7db4149a75078 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Thu, 23 Jan 2025 12:24:28 +0300 Subject: [PATCH 144/161] dev: add pod chekers --- pkg/controller/chi/worker.go | 38 ++++++++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/pkg/controller/chi/worker.go b/pkg/controller/chi/worker.go index cbab798b4..f71a86a43 100644 --- a/pkg/controller/chi/worker.go +++ b/pkg/controller/chi/worker.go @@ -17,6 +17,7 @@ package chi import ( "context" "errors" + "github.com/altinity/clickhouse-operator/pkg/model/k8s" "time" core "k8s.io/api/core/v1" @@ -194,15 +195,36 @@ func (w *worker) shouldForceRestartHost(host *api.Host) bool { } func (w *worker) isPodCrushed(host *api.Host) bool { - // pod.Status.ContainerStatuses[0].State.Waiting.Reason if pod, err := w.c.kube.Pod().Get(host); err == nil { - if len(pod.Status.ContainerStatuses) > 0 { - if pod.Status.ContainerStatuses[0].State.Waiting != nil { - if pod.Status.ContainerStatuses[0].State.Waiting.Reason == "CrashLoopBackOff" { - return true - } - } - } + return k8s.PodHasCrushedContainers(pod) + } + return true +} + +func (w *worker) isPodReady(host *api.Host) bool { + if pod, err := w.c.kube.Pod().Get(host); err == nil { + return !k8s.PodHasNotReadyContainers(pod) + } + return false +} + +func (w *worker) isPodStarted(host *api.Host) bool { + if pod, err := w.c.kube.Pod().Get(host); err == nil { + return !k8s.PodHasNotStartedContainers(pod) + } + return false +} + +func (w *worker) isPodRunning(host *api.Host) bool { + if pod, err := w.c.kube.Pod().Get(host); err == nil { + return k8s.PodPhaseIsRunning(pod) + } + return false +} + +func (w *worker) isPodOK(host *api.Host) bool { + if pod, err := w.c.kube.Pod().Get(host); err == nil { + return k8s.IsPodOK(pod) } return false } From 0651b8db80fd3bab86554f86dcce974c2e74a6e8 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Thu, 23 Jan 2025 12:25:18 +0300 Subject: [PATCH 145/161] dev: introduce version getter for availability checker --- pkg/controller/chi/worker-reconciler-helper.go | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pkg/controller/chi/worker-reconciler-helper.go b/pkg/controller/chi/worker-reconciler-helper.go index f84df5314..1dbb5aa7f 100644 --- a/pkg/controller/chi/worker-reconciler-helper.go +++ b/pkg/controller/chi/worker-reconciler-helper.go @@ -37,6 +37,20 @@ func (w *worker) getHostSoftwareVersion(ctx context.Context, host *api.Host) str return version } +func (w *worker) getHostSoftwareVersionErr(ctx context.Context, host *api.Host) error { + version, err := w.getHostClickHouseVersion( + ctx, + host, + versionOptions{}, + ) + if err == nil { + w.a.V(1).M(host).F().Info("Host software version detected. Host: %s version: %s", host.GetName(), version) + } else { + w.a.V(1).M(host).F().Info("Host software version NOT detected. Host: %s Err: %v", host.GetName(), err) + } + return err +} + // getReconcileShardsWorkersNum calculates how many workers are allowed to be used for concurrent shard reconcile func (w *worker) getReconcileShardsWorkersNum(shards []*api.ChiShard, opts *common.ReconcileShardsAndHostsOptions) int { availableWorkers := float64(chop.Config().Reconcile.Runtime.ReconcileShardsThreadsNumber) From 7123293656d7d772dfaa2e4b17a318204d8b3934 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Thu, 23 Jan 2025 12:26:09 +0300 Subject: [PATCH 146/161] dev: soft resrtart polising --- pkg/controller/chi/worker-reconciler-chi.go | 49 +++++++++++++++++++-- 1 file changed, 45 insertions(+), 4 deletions(-) diff --git a/pkg/controller/chi/worker-reconciler-chi.go b/pkg/controller/chi/worker-reconciler-chi.go index db0ae8c18..a12a496ec 100644 --- a/pkg/controller/chi/worker-reconciler-chi.go +++ b/pkg/controller/chi/worker-reconciler-chi.go @@ -17,6 +17,7 @@ package chi import ( "context" "errors" + "fmt" "time" log "github.com/altinity/clickhouse-operator/pkg/announcer" @@ -353,7 +354,7 @@ func (w *worker) reconcileHostStatefulSet(ctx context.Context, host *api.Host, o func (w *worker) hostForceRestart(ctx context.Context, host *api.Host, opts *statefulset.ReconcileOptions) error { w.a.V(1).M(host).F().Info("Reconcile host. Force restart: %s", host.GetName()) - if w.hostSoftwareRestart(ctx, host) != nil { + if host.IsStopped() || (w.hostSoftwareRestart(ctx, host) != nil) { _ = w.hostScaleDown(ctx, host, opts) } @@ -362,9 +363,7 @@ func (w *worker) hostForceRestart(ctx context.Context, host *api.Host, opts *sta } func (w *worker) hostSoftwareRestart(ctx context.Context, host *api.Host) error { - //return fmt.Errorf("so be it") - - w.a.V(1).M(host).F().Info("Reconcile host. Host software restart: %s", host.GetName()) + w.a.V(1).M(host).F().Info("Host software restart start. Host: %s", host.GetName()) restarts, err := w.c.kube.Pod().(interfaces.IKubePodEx).GetRestartCounters(host) if err != nil { @@ -377,12 +376,54 @@ func (w *worker) hostSoftwareRestart(ctx context.Context, host *api.Host) error w.a.V(1).M(host).F().Info("Host software restart abort 2. Host: %s err: %v", host.GetName(), err) return err } + w.a.V(1).M(host).F().Info("Host software shutdown ok. Host: %s", host.GetName()) err = w.waitHostRestart(ctx, host, restarts) if err != nil { w.a.V(1).M(host).F().Info("Host software restart abort 3. Host: %s err: %v", host.GetName(), err) return err } + w.a.V(1).M(host).F().Info("Host software restart ok. Host: %s", host.GetName()) + + err = w.waitHostIsStarted(ctx, host) + if err != nil { + w.a.V(1).M(host).F().Info("Host software restart abort 4. Host: %s is not started", host.GetName()) + return fmt.Errorf("host is not started") + } + w.a.V(1).M(host).F().Info("Host software pod is started. Host: %s ", host.GetName()) + + err = w.waitHostIsRunning(ctx, host) + if err != nil { + w.a.V(1).M(host).F().Info("Host software restart abort 5. Host: %s is not running", host.GetName()) + return fmt.Errorf("host is not running") + } + w.a.V(1).M(host).F().Info("Host software pod is running. Host: %s ", host.GetName()) + + err = w.waitHostIsReady(ctx, host) + if err != nil { + w.a.V(1).M(host).F().Info("Host software restart abort 6. Host: %s is not ready", host.GetName()) + return fmt.Errorf("host is not ready") + } + w.a.V(1).M(host).F().Info("Host software pod is ready. Host: %s ", host.GetName()) + + err = w.getHostSoftwareVersionErr(ctx, host) + if err != nil { + w.a.V(1).M(host).F().Info("Host software restart abort 7. Host: %s err: %v", host.GetName(), err) + return err + } + w.a.V(1).M(host).F().Info("Host software version ok. Host: %s ", host.GetName()) + + if w.isPodCrushed(host) { + w.a.V(1).M(host).F().Info("Host software restart abort 8. Host: %s is crushed", host.GetName()) + return fmt.Errorf("host is crushed") + } + w.a.V(1).M(host).F().Info("Host software is not crushed. Host: %s ", host.GetName()) + + if !w.isPodOK(host) { + w.a.V(1).M(host).F().Info("Host software restart abort 9. Host: %s is not ok", host.GetName()) + return fmt.Errorf("host is not ok") + } + w.a.V(1).M(host).F().Info("Host software pod is ok. Host: %s ", host.GetName()) w.a.V(1).M(host).F().Info("Host software restart success. Host: %s", host.GetName()) return nil From 327589d78952b7b20ede6d5add04c01a5f787221 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Thu, 23 Jan 2025 12:27:08 +0300 Subject: [PATCH 147/161] dev: formatter --- pkg/controller/chi/worker.go | 2 +- pkg/model/k8s/pod.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/controller/chi/worker.go b/pkg/controller/chi/worker.go index f71a86a43..1c5d876de 100644 --- a/pkg/controller/chi/worker.go +++ b/pkg/controller/chi/worker.go @@ -17,7 +17,6 @@ package chi import ( "context" "errors" - "github.com/altinity/clickhouse-operator/pkg/model/k8s" "time" core "k8s.io/api/core/v1" @@ -47,6 +46,7 @@ import ( commonCreator "github.com/altinity/clickhouse-operator/pkg/model/common/creator" commonMacro "github.com/altinity/clickhouse-operator/pkg/model/common/macro" commonNormalizer "github.com/altinity/clickhouse-operator/pkg/model/common/normalizer" + "github.com/altinity/clickhouse-operator/pkg/model/k8s" "github.com/altinity/clickhouse-operator/pkg/model/managers" "github.com/altinity/clickhouse-operator/pkg/util" "github.com/altinity/queue" diff --git a/pkg/model/k8s/pod.go b/pkg/model/k8s/pod.go index 07f54bb74..a88b796c6 100644 --- a/pkg/model/k8s/pod.go +++ b/pkg/model/k8s/pod.go @@ -48,7 +48,7 @@ func PodHasCrushedContainers(pod *core.Pod) bool { func PodHasNotReadyContainers(pod *core.Pod) bool { for _, containerStatus := range pod.Status.ContainerStatuses { - if !containerStatus.Ready { + if !containerStatus.Ready { // Not ready return true } From 97d2fd3a0f0be12812d741c1e1d9d8a54165a87d Mon Sep 17 00:00:00 2001 From: alz Date: Fri, 24 Jan 2025 12:51:21 +0300 Subject: [PATCH 148/161] Cleanup metrics check --- tests/e2e/steps.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/tests/e2e/steps.py b/tests/e2e/steps.py index 5866b44ea..6a53b927c 100644 --- a/tests/e2e/steps.py +++ b/tests/e2e/steps.py @@ -158,6 +158,7 @@ def check_metrics_monitoring( max_retries=7 ): with Then(f"metrics-exporter /metrics endpoint result should contain {expect_pattern} {expect_metric} {expect_labels}"): + expected_pattern_found = False for i in range(1, max_retries): url_cmd = util.make_http_get_request("127.0.0.1", port, "/metrics") out = kubectl.launch( @@ -169,20 +170,15 @@ def check_metrics_monitoring( if len(lines) > 0: metric = lines[0] print(metric) - expected_pattern_found = expect_labels in metric - else: - expected_pattern_found = False - break + assert expect_labels in metric, error(metric) + return if expect_pattern != "": rx = re.compile(expect_pattern, re.MULTILINE) matches = rx.findall(out) - expected_pattern_found = False if matches: expected_pattern_found = True - - if expected_pattern_found: break with Then("Not ready. Wait for " + str(i * 5) + " seconds"): From 8dc1bfd2efd0698d94c8bff7791f5c85b65202a3 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Fri, 24 Jan 2025 14:22:52 +0300 Subject: [PATCH 149/161] dev: add ctx to kube cjeckers --- pkg/controller/chi/worker.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/controller/chi/worker.go b/pkg/controller/chi/worker.go index 1c5d876de..fdd4ac3f9 100644 --- a/pkg/controller/chi/worker.go +++ b/pkg/controller/chi/worker.go @@ -201,28 +201,28 @@ func (w *worker) isPodCrushed(host *api.Host) bool { return true } -func (w *worker) isPodReady(host *api.Host) bool { +func (w *worker) isPodReady(ctx context.Context, host *api.Host) bool { if pod, err := w.c.kube.Pod().Get(host); err == nil { return !k8s.PodHasNotReadyContainers(pod) } return false } -func (w *worker) isPodStarted(host *api.Host) bool { +func (w *worker) isPodStarted(ctx context.Context, host *api.Host) bool { if pod, err := w.c.kube.Pod().Get(host); err == nil { return !k8s.PodHasNotStartedContainers(pod) } return false } -func (w *worker) isPodRunning(host *api.Host) bool { +func (w *worker) isPodRunning(ctx context.Context, host *api.Host) bool { if pod, err := w.c.kube.Pod().Get(host); err == nil { return k8s.PodPhaseIsRunning(pod) } return false } -func (w *worker) isPodOK(host *api.Host) bool { +func (w *worker) isPodOK(ctx context.Context, host *api.Host) bool { if pod, err := w.c.kube.Pod().Get(host); err == nil { return k8s.IsPodOK(pod) } From 13c09bf8f6cf694b2d8844906f1dbb92d0b690ea Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Fri, 24 Jan 2025 14:23:16 +0300 Subject: [PATCH 150/161] dev: pass ctx --- pkg/controller/chi/worker-reconciler-chi.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/controller/chi/worker-reconciler-chi.go b/pkg/controller/chi/worker-reconciler-chi.go index a12a496ec..70abd86f6 100644 --- a/pkg/controller/chi/worker-reconciler-chi.go +++ b/pkg/controller/chi/worker-reconciler-chi.go @@ -419,7 +419,7 @@ func (w *worker) hostSoftwareRestart(ctx context.Context, host *api.Host) error } w.a.V(1).M(host).F().Info("Host software is not crushed. Host: %s ", host.GetName()) - if !w.isPodOK(host) { + if !w.isPodOK(ctx, host) { w.a.V(1).M(host).F().Info("Host software restart abort 9. Host: %s is not ok", host.GetName()) return fmt.Errorf("host is not ok") } From 757591a513aafb86f26108fee490228f18fdbd8a Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Fri, 24 Jan 2025 14:23:34 +0300 Subject: [PATCH 151/161] dev: streamlilne status checkers --- .../worker-wait-exclude-include-restart.go | 26 +++---------------- 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/pkg/controller/chi/worker-wait-exclude-include-restart.go b/pkg/controller/chi/worker-wait-exclude-include-restart.go index e298d8e57..d58985c2f 100644 --- a/pkg/controller/chi/worker-wait-exclude-include-restart.go +++ b/pkg/controller/chi/worker-wait-exclude-include-restart.go @@ -360,39 +360,21 @@ func (w *worker) waitHostNoActiveQueries(ctx context.Context, host *api.Host) er func (w *worker) waitHostRestart(ctx context.Context, host *api.Host, start map[string]int) error { return domain.PollHost(ctx, host, func(ctx context.Context, host *api.Host) bool { cur, _ := w.c.kube.Pod().(interfaces.IKubePodEx).GetRestartCounters(host) - if !util.MapsAreTheSame(start, cur) { - return false - } - return true + return !util.MapsAreTheSame(start, cur) }) } // waitHostIsReady func (w *worker) waitHostIsReady(ctx context.Context, host *api.Host) error { - return domain.PollHost(ctx, host, func(ctx context.Context, host *api.Host) bool { - if w.isPodReady(host) { - return false - } - return true - }) + return domain.PollHost(ctx, host, w.isPodReady) } // waitHostIsStarted func (w *worker) waitHostIsStarted(ctx context.Context, host *api.Host) error { - return domain.PollHost(ctx, host, func(ctx context.Context, host *api.Host) bool { - if w.isPodStarted(host) { - return false - } - return true - }) + return domain.PollHost(ctx, host, w.isPodStarted) } // waitHostIsRunning func (w *worker) waitHostIsRunning(ctx context.Context, host *api.Host) error { - return domain.PollHost(ctx, host, func(ctx context.Context, host *api.Host) bool { - if w.isPodRunning(host) { - return false - } - return true - }) + return domain.PollHost(ctx, host, w.isPodRunning) } From cbf8fc74c14b810021b76bdd6bc40a1b63d695f0 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Fri, 24 Jan 2025 14:34:55 +0300 Subject: [PATCH 152/161] dev: introduce expliciit pod restart check fn --- pkg/controller/chi/worker.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pkg/controller/chi/worker.go b/pkg/controller/chi/worker.go index fdd4ac3f9..fd669b3ad 100644 --- a/pkg/controller/chi/worker.go +++ b/pkg/controller/chi/worker.go @@ -229,6 +229,11 @@ func (w *worker) isPodOK(ctx context.Context, host *api.Host) bool { return false } +func (w *worker) isPodRestarted(ctx context.Context, host *api.Host, start map[string]int) bool { + cur, _ := w.c.kube.Pod().(interfaces.IKubePodEx).GetRestartCounters(host) + return !util.MapsAreTheSame(start, cur) +} + // normalize func (w *worker) normalize(c *api.ClickHouseInstallation) *api.ClickHouseInstallation { chi, err := w.normalizer.CreateTemplated(c, commonNormalizer.NewOptions()) From e347958e5f1c3e129a4280f658a5a6eea44177eb Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Fri, 24 Jan 2025 14:35:08 +0300 Subject: [PATCH 153/161] dev: streamline --- pkg/controller/chi/worker-wait-exclude-include-restart.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pkg/controller/chi/worker-wait-exclude-include-restart.go b/pkg/controller/chi/worker-wait-exclude-include-restart.go index d58985c2f..bc94dfb66 100644 --- a/pkg/controller/chi/worker-wait-exclude-include-restart.go +++ b/pkg/controller/chi/worker-wait-exclude-include-restart.go @@ -22,7 +22,6 @@ import ( api "github.com/altinity/clickhouse-operator/pkg/apis/clickhouse.altinity.com/v1" "github.com/altinity/clickhouse-operator/pkg/chop" "github.com/altinity/clickhouse-operator/pkg/controller/common/poller/domain" - "github.com/altinity/clickhouse-operator/pkg/interfaces" "github.com/altinity/clickhouse-operator/pkg/util" ) @@ -359,8 +358,7 @@ func (w *worker) waitHostNoActiveQueries(ctx context.Context, host *api.Host) er // waitHostRestart func (w *worker) waitHostRestart(ctx context.Context, host *api.Host, start map[string]int) error { return domain.PollHost(ctx, host, func(ctx context.Context, host *api.Host) bool { - cur, _ := w.c.kube.Pod().(interfaces.IKubePodEx).GetRestartCounters(host) - return !util.MapsAreTheSame(start, cur) + return w.isPodRestarted(ctx, host, start) }) } From ed0ed33084f0b2bc77c74e8cfd7628cac2645118 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Fri, 24 Jan 2025 14:40:25 +0300 Subject: [PATCH 154/161] dev: introduce queries num checker fun --- pkg/controller/chi/worker.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pkg/controller/chi/worker.go b/pkg/controller/chi/worker.go index fd669b3ad..b6b007dd3 100644 --- a/pkg/controller/chi/worker.go +++ b/pkg/controller/chi/worker.go @@ -234,6 +234,11 @@ func (w *worker) isPodRestarted(ctx context.Context, host *api.Host, start map[s return !util.MapsAreTheSame(start, cur) } +func (w *worker) doesHostHaveNoRunningQueries(ctx context.Context, host *api.Host) bool { + n, _ := w.ensureClusterSchemer(host).HostActiveQueriesNum(ctx, host) + return n <= 1 +} + // normalize func (w *worker) normalize(c *api.ClickHouseInstallation) *api.ClickHouseInstallation { chi, err := w.normalizer.CreateTemplated(c, commonNormalizer.NewOptions()) From 0b18bc6d661977fac3b93177d15698ef48903d9d Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Fri, 24 Jan 2025 14:40:38 +0300 Subject: [PATCH 155/161] dev: use queries num checker --- pkg/controller/chi/worker-wait-exclude-include-restart.go | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pkg/controller/chi/worker-wait-exclude-include-restart.go b/pkg/controller/chi/worker-wait-exclude-include-restart.go index bc94dfb66..c108f6eca 100644 --- a/pkg/controller/chi/worker-wait-exclude-include-restart.go +++ b/pkg/controller/chi/worker-wait-exclude-include-restart.go @@ -349,10 +349,7 @@ func (w *worker) waitHostNotInCluster(ctx context.Context, host *api.Host) error // waitHostNoActiveQueries func (w *worker) waitHostNoActiveQueries(ctx context.Context, host *api.Host) error { - return domain.PollHost(ctx, host, func(ctx context.Context, host *api.Host) bool { - n, _ := w.ensureClusterSchemer(host).HostActiveQueriesNum(ctx, host) - return n <= 1 - }) + return domain.PollHost(ctx, host, w.doesHostHaveNoRunningQueries) } // waitHostRestart From 22287ca521c501ea6589303f3a38ef37f3848056 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Fri, 24 Jan 2025 15:10:25 +0300 Subject: [PATCH 156/161] dev: extract worker helpers --- pkg/controller/chi/worker-status-helpers.go | 192 ++++++++++++++++++++ pkg/controller/chi/worker.go | 166 ----------------- 2 files changed, 192 insertions(+), 166 deletions(-) create mode 100644 pkg/controller/chi/worker-status-helpers.go diff --git a/pkg/controller/chi/worker-status-helpers.go b/pkg/controller/chi/worker-status-helpers.go new file mode 100644 index 000000000..4a7607e14 --- /dev/null +++ b/pkg/controller/chi/worker-status-helpers.go @@ -0,0 +1,192 @@ +// Copyright 2019 Altinity Ltd and/or its affiliates. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package chi + +import ( + "context" + "time" + + log "github.com/altinity/clickhouse-operator/pkg/announcer" + api "github.com/altinity/clickhouse-operator/pkg/apis/clickhouse.altinity.com/v1" + "github.com/altinity/clickhouse-operator/pkg/apis/deployment" + "github.com/altinity/clickhouse-operator/pkg/chop" + "github.com/altinity/clickhouse-operator/pkg/interfaces" + "github.com/altinity/clickhouse-operator/pkg/model/chi/config" + commonConfig "github.com/altinity/clickhouse-operator/pkg/model/common/config" + "github.com/altinity/clickhouse-operator/pkg/model/k8s" + "github.com/altinity/clickhouse-operator/pkg/util" +) + +// timeToStart specifies time that operator does not accept changes +const timeToStart = 1 * time.Minute + +// isJustStarted checks whether worked just started +func (w *worker) isJustStarted() bool { + return time.Since(w.start) < timeToStart +} + +func (w *worker) isPodCrushed(host *api.Host) bool { + if pod, err := w.c.kube.Pod().Get(host); err == nil { + return k8s.PodHasCrushedContainers(pod) + } + return true +} + +func (w *worker) isPodReady(ctx context.Context, host *api.Host) bool { + if pod, err := w.c.kube.Pod().Get(host); err == nil { + return !k8s.PodHasNotReadyContainers(pod) + } + return false +} + +func (w *worker) isPodStarted(ctx context.Context, host *api.Host) bool { + if pod, err := w.c.kube.Pod().Get(host); err == nil { + return !k8s.PodHasNotStartedContainers(pod) + } + return false +} + +func (w *worker) isPodRunning(ctx context.Context, host *api.Host) bool { + if pod, err := w.c.kube.Pod().Get(host); err == nil { + return k8s.PodPhaseIsRunning(pod) + } + return false +} + +func (w *worker) isPodOK(ctx context.Context, host *api.Host) bool { + if pod, err := w.c.kube.Pod().Get(host); err == nil { + return k8s.IsPodOK(pod) + } + return false +} + +func (w *worker) isPodRestarted(ctx context.Context, host *api.Host, start map[string]int) bool { + cur, _ := w.c.kube.Pod().(interfaces.IKubePodEx).GetRestartCounters(host) + return !util.MapsAreTheSame(start, cur) +} + +func (w *worker) doesHostHaveNoRunningQueries(ctx context.Context, host *api.Host) bool { + n, _ := w.ensureClusterSchemer(host).HostActiveQueriesNum(ctx, host) + return n <= 1 +} + +// isCHIProcessedOnTheSameIP checks whether it is just a restart of the operator on the same IP +func (w *worker) isCHIProcessedOnTheSameIP(chi *api.ClickHouseInstallation) bool { + ip, _ := chop.Get().ConfigManager.GetRuntimeParam(deployment.OPERATOR_POD_IP) + operatorIpIsTheSame := ip == chi.Status.GetCHOpIP() + log.V(1).Info("Operator IPs to process CHI: %s. Previous: %s Cur: %s", chi.Name, chi.Status.GetCHOpIP(), ip) + + if !operatorIpIsTheSame { + // Operator has restarted on the different IP address. + // We may need to reconcile config files + log.V(1).Info("Operator IPs are different. Operator was restarted on another IP since previous reconcile of the CHI: %s", chi.Name) + return false + } + + log.V(1).Info("Operator IPs are the same as on previous reconcile of the CHI: %s", chi.Name) + return w.isCleanRestart(chi) +} + +// isCleanRestart checks whether it is just a restart of the operator and CHI has no changes since last processed +func (w *worker) isCleanRestart(chi *api.ClickHouseInstallation) bool { + // Clean restart may be only in case operator has just recently started + if !w.isJustStarted() { + log.V(1).Info("Operator is not just started. May not be clean restart") + return false + } + + log.V(1).Info("Operator just started. May be clean restart") + + // Migration support + // Do we have have previously completed CHI? + // In case no - this means that CHI has either not completed or we are migrating from + // such a version of the operator, where there is no completed CHI at all + noCompletedCHI := !chi.HasAncestor() + // Having status completed and not having completed CHI suggests we are migrating operator version + statusIsCompleted := chi.Status.GetStatus() == api.StatusCompleted + if noCompletedCHI && statusIsCompleted { + // In case of a restart - assume that normalized is already completed + chi.SetAncestor(chi.GetTarget()) + } + + // Check whether anything has changed in CHI spec + // In case the generation is the same as already completed - it is clean restart + generationIsOk := false + // However, completed CHI still can be missing, for example, in newly requested CHI + if chi.HasAncestor() { + generationIsOk = chi.Generation == chi.GetAncestor().GetGeneration() + log.V(1).Info( + "CHI %s has ancestor. Generations. Prev: %d Cur: %d Generation is the same: %t", + chi.Name, + chi.GetAncestor().GetGeneration(), + chi.Generation, + generationIsOk, + ) + } else { + log.V(1).Info("CHI %s has NO ancestor, meaning reconcile cycle was never completed.", chi.Name) + } + + log.V(1).Info("Is CHI %s clean on operator restart: %t", chi.Name, generationIsOk) + return generationIsOk +} + +// areUsableOldAndNew checks whether there are old and new usable +func (w *worker) areUsableOldAndNew(old, new *api.ClickHouseInstallation) bool { + if old == nil { + return false + } + if new == nil { + return false + } + return true +} + +// isAfterFinalizerInstalled checks whether we are just installed finalizer +func (w *worker) isAfterFinalizerInstalled(old, new *api.ClickHouseInstallation) bool { + if !w.areUsableOldAndNew(old, new) { + return false + } + + finalizerIsInstalled := len(old.Finalizers) == 0 && len(new.Finalizers) > 0 + return w.isGenerationTheSame(old, new) && finalizerIsInstalled +} + +// isGenerationTheSame checks whether old ans new CHI have the same generation +func (w *worker) isGenerationTheSame(old, new *api.ClickHouseInstallation) bool { + if !w.areUsableOldAndNew(old, new) { + return false + } + + return old.GetGeneration() == new.GetGeneration() +} + +// getRemoteServersGeneratorOptions build base set of RemoteServersOptions +func (w *worker) getRemoteServersGeneratorOptions() *commonConfig.HostSelector { + // Base model specifies to exclude: + // 1. all newly added hosts + // 2. all explicitly excluded hosts + return commonConfig.NewHostSelector().ExcludeReconcileAttributes( + api.NewHostReconcileAttributes(). + SetAdd(). + SetExclude(), + ) +} + +// options build FilesGeneratorOptionsClickHouse +func (w *worker) options() *config.FilesGeneratorOptions { + opts := w.getRemoteServersGeneratorOptions() + w.a.Info("RemoteServersOptions: %s", opts) + return config.NewFilesGeneratorOptions().SetRemoteServersOptions(opts) +} diff --git a/pkg/controller/chi/worker.go b/pkg/controller/chi/worker.go index b6b007dd3..3a715e7ab 100644 --- a/pkg/controller/chi/worker.go +++ b/pkg/controller/chi/worker.go @@ -25,8 +25,6 @@ import ( log "github.com/altinity/clickhouse-operator/pkg/announcer" api "github.com/altinity/clickhouse-operator/pkg/apis/clickhouse.altinity.com/v1" "github.com/altinity/clickhouse-operator/pkg/apis/common/types" - "github.com/altinity/clickhouse-operator/pkg/apis/deployment" - "github.com/altinity/clickhouse-operator/pkg/chop" "github.com/altinity/clickhouse-operator/pkg/controller/chi/metrics" "github.com/altinity/clickhouse-operator/pkg/controller/common" a "github.com/altinity/clickhouse-operator/pkg/controller/common/announcer" @@ -42,11 +40,9 @@ import ( "github.com/altinity/clickhouse-operator/pkg/model/chi/schemer" "github.com/altinity/clickhouse-operator/pkg/model/chi/tags/labeler" "github.com/altinity/clickhouse-operator/pkg/model/common/action_plan" - commonConfig "github.com/altinity/clickhouse-operator/pkg/model/common/config" commonCreator "github.com/altinity/clickhouse-operator/pkg/model/common/creator" commonMacro "github.com/altinity/clickhouse-operator/pkg/model/common/macro" commonNormalizer "github.com/altinity/clickhouse-operator/pkg/model/common/normalizer" - "github.com/altinity/clickhouse-operator/pkg/model/k8s" "github.com/altinity/clickhouse-operator/pkg/model/managers" "github.com/altinity/clickhouse-operator/pkg/util" "github.com/altinity/queue" @@ -152,14 +148,6 @@ func (w *worker) newTask(new, old *api.ClickHouseInstallation) { ) } -// timeToStart specifies time that operator does not accept changes -const timeToStart = 1 * time.Minute - -// isJustStarted checks whether worked just started -func (w *worker) isJustStarted() bool { - return time.Since(w.start) < timeToStart -} - // shouldForceRestartHost checks whether cluster requires hosts restart func (w *worker) shouldForceRestartHost(host *api.Host) bool { // RollingUpdate purpose is to always shut the host down. @@ -194,51 +182,6 @@ func (w *worker) shouldForceRestartHost(host *api.Host) bool { return false } -func (w *worker) isPodCrushed(host *api.Host) bool { - if pod, err := w.c.kube.Pod().Get(host); err == nil { - return k8s.PodHasCrushedContainers(pod) - } - return true -} - -func (w *worker) isPodReady(ctx context.Context, host *api.Host) bool { - if pod, err := w.c.kube.Pod().Get(host); err == nil { - return !k8s.PodHasNotReadyContainers(pod) - } - return false -} - -func (w *worker) isPodStarted(ctx context.Context, host *api.Host) bool { - if pod, err := w.c.kube.Pod().Get(host); err == nil { - return !k8s.PodHasNotStartedContainers(pod) - } - return false -} - -func (w *worker) isPodRunning(ctx context.Context, host *api.Host) bool { - if pod, err := w.c.kube.Pod().Get(host); err == nil { - return k8s.PodPhaseIsRunning(pod) - } - return false -} - -func (w *worker) isPodOK(ctx context.Context, host *api.Host) bool { - if pod, err := w.c.kube.Pod().Get(host); err == nil { - return k8s.IsPodOK(pod) - } - return false -} - -func (w *worker) isPodRestarted(ctx context.Context, host *api.Host, start map[string]int) bool { - cur, _ := w.c.kube.Pod().(interfaces.IKubePodEx).GetRestartCounters(host) - return !util.MapsAreTheSame(start, cur) -} - -func (w *worker) doesHostHaveNoRunningQueries(ctx context.Context, host *api.Host) bool { - n, _ := w.ensureClusterSchemer(host).HostActiveQueriesNum(ctx, host) - return n <= 1 -} - // normalize func (w *worker) normalize(c *api.ClickHouseInstallation) *api.ClickHouseInstallation { chi, err := w.normalizer.CreateTemplated(c, commonNormalizer.NewOptions()) @@ -396,96 +339,6 @@ func (w *worker) updateCHI(ctx context.Context, old, new *api.ClickHouseInstalla return w.reconcileCR(ctx, old, new) } -// isCHIProcessedOnTheSameIP checks whether it is just a restart of the operator on the same IP -func (w *worker) isCHIProcessedOnTheSameIP(chi *api.ClickHouseInstallation) bool { - ip, _ := chop.Get().ConfigManager.GetRuntimeParam(deployment.OPERATOR_POD_IP) - operatorIpIsTheSame := ip == chi.Status.GetCHOpIP() - log.V(1).Info("Operator IPs to process CHI: %s. Previous: %s Cur: %s", chi.Name, chi.Status.GetCHOpIP(), ip) - - if !operatorIpIsTheSame { - // Operator has restarted on the different IP address. - // We may need to reconcile config files - log.V(1).Info("Operator IPs are different. Operator was restarted on another IP since previous reconcile of the CHI: %s", chi.Name) - return false - } - - log.V(1).Info("Operator IPs are the same as on previous reconcile of the CHI: %s", chi.Name) - return w.isCleanRestart(chi) -} - -// isCleanRestart checks whether it is just a restart of the operator and CHI has no changes since last processed -func (w *worker) isCleanRestart(chi *api.ClickHouseInstallation) bool { - // Clean restart may be only in case operator has just recently started - if !w.isJustStarted() { - log.V(1).Info("Operator is not just started. May not be clean restart") - return false - } - - log.V(1).Info("Operator just started. May be clean restart") - - // Migration support - // Do we have have previously completed CHI? - // In case no - this means that CHI has either not completed or we are migrating from - // such a version of the operator, where there is no completed CHI at all - noCompletedCHI := !chi.HasAncestor() - // Having status completed and not having completed CHI suggests we are migrating operator version - statusIsCompleted := chi.Status.GetStatus() == api.StatusCompleted - if noCompletedCHI && statusIsCompleted { - // In case of a restart - assume that normalized is already completed - chi.SetAncestor(chi.GetTarget()) - } - - // Check whether anything has changed in CHI spec - // In case the generation is the same as already completed - it is clean restart - generationIsOk := false - // However, completed CHI still can be missing, for example, in newly requested CHI - if chi.HasAncestor() { - generationIsOk = chi.Generation == chi.GetAncestor().GetGeneration() - log.V(1).Info( - "CHI %s has ancestor. Generations. Prev: %d Cur: %d Generation is the same: %t", - chi.Name, - chi.GetAncestor().GetGeneration(), - chi.Generation, - generationIsOk, - ) - } else { - log.V(1).Info("CHI %s has NO ancestor, meaning reconcile cycle was never completed.", chi.Name) - } - - log.V(1).Info("Is CHI %s clean on operator restart: %t", chi.Name, generationIsOk) - return generationIsOk -} - -// areUsableOldAndNew checks whether there are old and new usable -func (w *worker) areUsableOldAndNew(old, new *api.ClickHouseInstallation) bool { - if old == nil { - return false - } - if new == nil { - return false - } - return true -} - -// isAfterFinalizerInstalled checks whether we are just installed finalizer -func (w *worker) isAfterFinalizerInstalled(old, new *api.ClickHouseInstallation) bool { - if !w.areUsableOldAndNew(old, new) { - return false - } - - finalizerIsInstalled := len(old.Finalizers) == 0 && len(new.Finalizers) > 0 - return w.isGenerationTheSame(old, new) && finalizerIsInstalled -} - -// isGenerationTheSame checks whether old ans new CHI have the same generation -func (w *worker) isGenerationTheSame(old, new *api.ClickHouseInstallation) bool { - if !w.areUsableOldAndNew(old, new) { - return false - } - - return old.GetGeneration() == new.GetGeneration() -} - // excludeStoppedCHIFromMonitoring excludes stopped CHI from monitoring func (w *worker) excludeStoppedCHIFromMonitoring(chi *api.ClickHouseInstallation) { if !chi.IsStopped() { @@ -729,25 +582,6 @@ func (w *worker) walkHosts(ctx context.Context, chi *api.ClickHouseInstallation, }) } -// getRemoteServersGeneratorOptions build base set of RemoteServersOptions -func (w *worker) getRemoteServersGeneratorOptions() *commonConfig.HostSelector { - // Base model specifies to exclude: - // 1. all newly added hosts - // 2. all explicitly excluded hosts - return commonConfig.NewHostSelector().ExcludeReconcileAttributes( - api.NewHostReconcileAttributes(). - SetAdd(). - SetExclude(), - ) -} - -// options build FilesGeneratorOptionsClickHouse -func (w *worker) options() *config.FilesGeneratorOptions { - opts := w.getRemoteServersGeneratorOptions() - w.a.Info("RemoteServersOptions: %s", opts) - return config.NewFilesGeneratorOptions().SetRemoteServersOptions(opts) -} - // createCRFromObjectMeta func (w *worker) createCRFromObjectMeta(meta meta.Object, isCHI bool, options *commonNormalizer.Options) (*api.ClickHouseInstallation, error) { w.a.V(3).M(meta).S().P() From 6b3bf710d1175a29a483b79ce9318d2d160e1e75 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Fri, 24 Jan 2025 16:04:00 +0300 Subject: [PATCH 157/161] dev: add pod status untils --- pkg/model/k8s/pod.go | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/pkg/model/k8s/pod.go b/pkg/model/k8s/pod.go index a88b796c6..768623da8 100644 --- a/pkg/model/k8s/pod.go +++ b/pkg/model/k8s/pod.go @@ -57,19 +57,21 @@ func PodHasNotReadyContainers(pod *core.Pod) bool { return false } -func PodHasNotStartedContainers(pod *core.Pod) bool { - res := true +func PodHasAllContainersStarted(pod *core.Pod) bool { + allStarted := true for _, containerStatus := range pod.Status.ContainerStatuses { - if containerStatus.Started != nil { - if *containerStatus.Started { - // Started - continue - } + if (containerStatus.Started != nil) && (*containerStatus.Started) { + // Current container is started. no changes in all status + } else { + // Current container is NOT started + allStarted = false } - // Not started - res = false } - return res + return allStarted +} + +func PodHasNotStartedContainers(pod *core.Pod) bool { + return !PodHasAllContainersStarted(pod) } func PodPhaseIsRunning(pod *core.Pod) bool { From f8d0b7f81724d68895db0c70b7566e397493e902 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Fri, 24 Jan 2025 16:04:25 +0300 Subject: [PATCH 158/161] dev: streamline status check --- pkg/controller/chi/worker-status-helpers.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/controller/chi/worker-status-helpers.go b/pkg/controller/chi/worker-status-helpers.go index 4a7607e14..7f0a24aa6 100644 --- a/pkg/controller/chi/worker-status-helpers.go +++ b/pkg/controller/chi/worker-status-helpers.go @@ -53,7 +53,7 @@ func (w *worker) isPodReady(ctx context.Context, host *api.Host) bool { func (w *worker) isPodStarted(ctx context.Context, host *api.Host) bool { if pod, err := w.c.kube.Pod().Get(host); err == nil { - return !k8s.PodHasNotStartedContainers(pod) + return k8s.PodHasAllContainersStarted(pod) } return false } From c34d82a36110664ea5779205ae7e1806b9227a17 Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Fri, 24 Jan 2025 16:23:17 +0300 Subject: [PATCH 159/161] dev: comment --- pkg/util/runtime/runtime.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/util/runtime/runtime.go b/pkg/util/runtime/runtime.go index abc2857c0..0871a0d60 100644 --- a/pkg/util/runtime/runtime.go +++ b/pkg/util/runtime/runtime.go @@ -48,7 +48,7 @@ func Caller(skip string) (string, int, string) { return "", 0, "" } -// FunctionName returns name of thee calling function +// FunctionName returns name of the calling function func FunctionName(fn interface{}) string { return runtime.FuncForPC(reflect.ValueOf(fn).Pointer()).Name() } From 63b80a1a760ff2d14c9e70a5c16d8806e7b98e05 Mon Sep 17 00:00:00 2001 From: Eugene Klimov Date: Fri, 24 Jan 2025 23:12:00 +0500 Subject: [PATCH 160/161] add example for ClickHouseInstallationTemplate and ClickHouseInstallation with added additonal volume with emptyDir to using local disk (#1622) --- ...tent-volume-09-with-template-emptydir.yaml | 116 ++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 docs/chi-examples/03-persistent-volume-09-with-template-emptydir.yaml diff --git a/docs/chi-examples/03-persistent-volume-09-with-template-emptydir.yaml b/docs/chi-examples/03-persistent-volume-09-with-template-emptydir.yaml new file mode 100644 index 000000000..8d1362b57 --- /dev/null +++ b/docs/chi-examples/03-persistent-volume-09-with-template-emptydir.yaml @@ -0,0 +1,116 @@ +--- +apiVersion: clickhouse.altinity.com/v1 +kind: ClickHouseInstallationTemplate +metadata: + name: instance-store-emptydir-template +spec: + templating: + policy: auto + chiSelector: + clickhouse.altinity.com/chi: s3-cache-via-template + + templates: + podTemplates: + - name: pod-template + spec: + volumes: + - name: clickhouse-nvme-cache + emptyDir: {} + + containers: + - name: clickhouse-pod + # CHIT have more priority than CHI, and override whole volumeMounts section + volumeMounts: + - name: data-storage-vc-template-1 + mountPath: /var/lib/clickhouse + - name: clickhouse-nvme-cache + mountPath: /var/lib/clickhouse/disks/s3_cache/ + +--- +apiVersion: "clickhouse.altinity.com/v1" +kind: "ClickHouseInstallation" +metadata: + name: "s3-cache-via-template" + labels: + clickhouse.altinity.com/chi: s3-cache-via-template +spec: + configuration: + clusters: + - name: "cluster" + templates: + podTemplate: pod-template + layout: + shardsCount: 1 + replicasCount: 1 + files: + config.d/storage_configuration.xml: | + + + + + s3 + https://sample-bucket.s3.amazonaws.com/s3_disk/{replica} + your_access_key_id + your_secret_access_key + us-east-2 + /var/lib/clickhouse/disks/s3_disk/ + + + cache + s3_disk + /var/lib/clickhouse/disks/s3_cache/ + 10Gi + + + + + +
+ s3_disk +
+
+
+ + +
+ s3_cache +
+
+
+ + + + default + + + s3_cache + + + +
+
+
+ templates: + podTemplates: + - name: pod-template + spec: + containers: + - name: clickhouse + image: clickhouse/clickhouse-server:latest + imagePullPolicy: IfNotPresent + volumeMounts: + - name: data-storage-vc-template-1 + mountPath: /var/lib/clickhouse + command: + - clickhouse-server + - --config-file=/etc/clickhouse-server/config.xml + + volumeClaimTemplates: + - name: data-storage-vc-template-1 + spec: + # storageClassName: standard + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi From c7e131559a99119b8aca915d7842a0053eb2bdbd Mon Sep 17 00:00:00 2001 From: Vladislav Klimenko Date: Mon, 27 Jan 2025 12:22:13 +0300 Subject: [PATCH 161/161] dev: api naming --- pkg/chop/chop.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/chop/chop.go b/pkg/chop/chop.go index ccf5a9f99..d9a9c771c 100644 --- a/pkg/chop/chop.go +++ b/pkg/chop/chop.go @@ -21,7 +21,7 @@ import ( kube "k8s.io/client-go/kubernetes" log "github.com/altinity/clickhouse-operator/pkg/announcer" - "github.com/altinity/clickhouse-operator/pkg/apis/clickhouse.altinity.com/v1" + api "github.com/altinity/clickhouse-operator/pkg/apis/clickhouse.altinity.com/v1" chopclientset "github.com/altinity/clickhouse-operator/pkg/client/clientset/versioned" ) @@ -63,7 +63,7 @@ func (c *CHOp) Init() error { } // Config returns operator config -func (c *CHOp) Config() *v1.OperatorConfig { +func (c *CHOp) Config() *api.OperatorConfig { if c == nil { return nil }