Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Fix] : cluster scaling #704

Merged
merged 6 commits into from
Nov 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/e2e.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ jobs:
- ./tests/e2e/v1beta2/setup
- ./tests/e2e/v1beta2/teardown
- ./tests/e2e/v1beta2/ignore-annots
- ./tests/e2e/v1beta2/scaling

steps:
- name: Checkout code
Expand Down
84 changes: 46 additions & 38 deletions controllers/rediscluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,27 +75,36 @@

// Check if the cluster is downscaled
if leaderReplicas < instance.Status.ReadyLeaderReplicas {

// Imp if the last index of leader sts is not leader make it then
// check whether the redis is leader or not ?
// if not true then make it leader pod

if !(k8sutils.VerifyLeaderPod(ctx, r.K8sClient, r.Log, instance)) {
// lastLeaderPod is slaving right now Make it the master Pod
// We have to bring a manual failover here to make it a leaderPod
// clusterFailover should also include the clusterReplicate since we have to map the followers to new leader
k8sutils.ClusterFailover(ctx, r.K8sClient, r.Log, instance)
reqLogger.Info("Redis cluster is downscaling...", "Ready.ReadyLeaderReplicas", instance.Status.ReadyLeaderReplicas, "Expected.ReadyLeaderReplicas", leaderReplicas)

// loop count times to remove the latest leader/follower pod
count := instance.Status.ReadyLeaderReplicas - leaderReplicas
for i := int32(0); i < count; i++ {
reqLogger.Info("Redis cluster is downscaling", "The times of loop", i)

// Imp if the last index of leader sts is not leader make it then
// check whether the redis is leader or not ?
// if not true then make it leader pod
if !(k8sutils.VerifyLeaderPod(ctx, r.K8sClient, r.Log, instance)) {
// lastLeaderPod is slaving right now Make it the master Pod
// We have to bring a manual failover here to make it a leaderPod
// clusterFailover should also include the clusterReplicate since we have to map the followers to new leader
k8sutils.ClusterFailover(ctx, r.K8sClient, r.Log, instance)
}

Check warning on line 93 in controllers/rediscluster_controller.go

View check run for this annotation

Codecov / codecov/patch

controllers/rediscluster_controller.go#L78-L93

Added lines #L78 - L93 were not covered by tests
// Step 1 Remove the Follower Node
k8sutils.RemoveRedisFollowerNodesFromCluster(ctx, r.K8sClient, r.Log, instance)
// Step 2 Reshard the Cluster
k8sutils.ReshardRedisCluster(r.K8sClient, r.Log, instance, true)

Check warning on line 97 in controllers/rediscluster_controller.go

View check run for this annotation

Codecov / codecov/patch

controllers/rediscluster_controller.go#L95-L97

Added lines #L95 - L97 were not covered by tests
}

// Step 1 Rehard the Cluster
k8sutils.ReshardRedisCluster(r.K8sClient, r.Log, instance)
// Step 2 Remove the Follower Node
k8sutils.RemoveRedisFollowerNodesFromCluster(ctx, r.K8sClient, r.Log, instance)
// Step 3 Remove the Leader Node
k8sutils.RemoveRedisNodeFromCluster(ctx, r.K8sClient, r.Log, instance)
// Step 4 Rebalance the cluster
reqLogger.Info("Redis cluster is downscaled... Rebalancing the cluster")
// Step 3 Rebalance the cluster

Check warning on line 100 in controllers/rediscluster_controller.go

View check run for this annotation

Codecov / codecov/patch

controllers/rediscluster_controller.go#L99-L100

Added lines #L99 - L100 were not covered by tests
k8sutils.RebalanceRedisCluster(r.K8sClient, r.Log, instance)
return ctrl.Result{RequeueAfter: time.Second * 100}, nil
reqLogger.Info("Redis cluster is downscaled... Rebalancing the cluster is done")
err = k8sutils.UpdateRedisClusterStatus(instance, status.RedisClusterReady, status.ReadyClusterReason, leaderReplicas, leaderReplicas)
if err != nil {
return ctrl.Result{RequeueAfter: time.Second * 10}, err
}
return ctrl.Result{RequeueAfter: time.Second * 60}, nil

Check warning on line 107 in controllers/rediscluster_controller.go

View check run for this annotation

Codecov / codecov/patch

controllers/rediscluster_controller.go#L102-L107

Added lines #L102 - L107 were not covered by tests
}

// Mark the cluster status as initializing if there are no leader or follower nodes
Expand Down Expand Up @@ -130,7 +139,7 @@
return ctrl.Result{}, err
}

if int32(redisLeaderInfo.Status.ReadyReplicas) == leaderReplicas {
if redisLeaderInfo.Status.ReadyReplicas == leaderReplicas {

Check warning on line 142 in controllers/rediscluster_controller.go

View check run for this annotation

Codecov / codecov/patch

controllers/rediscluster_controller.go#L142

Added line #L142 was not covered by tests

// Mark the cluster status as initializing if there are no follower nodes
if instance.Status.ReadyLeaderReplicas == 0 && instance.Status.ReadyFollowerReplicas == 0 {
Expand Down Expand Up @@ -166,26 +175,24 @@

if leaderReplicas == 0 {
reqLogger.Info("Redis leaders Cannot be 0", "Ready.Replicas", strconv.Itoa(int(redisLeaderInfo.Status.ReadyReplicas)), "Expected.Replicas", leaderReplicas)
return ctrl.Result{RequeueAfter: time.Second * 120}, nil
return ctrl.Result{RequeueAfter: time.Second * 60}, nil

Check warning on line 178 in controllers/rediscluster_controller.go

View check run for this annotation

Codecov / codecov/patch

controllers/rediscluster_controller.go#L178

Added line #L178 was not covered by tests
}

if !(redisLeaderInfo.Status.ReadyReplicas == leaderReplicas && redisFollowerInfo.Status.ReadyReplicas == followerReplicas) {
reqLogger.Info("Redis leader and follower nodes are not ready yet", "Ready.Replicas", strconv.Itoa(int(redisLeaderInfo.Status.ReadyReplicas)), "Expected.Replicas", leaderReplicas)
return ctrl.Result{RequeueAfter: time.Second * 120}, nil
return ctrl.Result{RequeueAfter: time.Second * 60}, nil

Check warning on line 183 in controllers/rediscluster_controller.go

View check run for this annotation

Codecov / codecov/patch

controllers/rediscluster_controller.go#L183

Added line #L183 was not covered by tests
}

// Mark the cluster status as bootstrapping if all the leader and follower nodes are ready
if int32(redisLeaderInfo.Status.ReadyReplicas) == leaderReplicas && int32(redisFollowerInfo.Status.ReadyReplicas) == followerReplicas {
if instance.Status.ReadyLeaderReplicas == leaderReplicas && instance.Status.ReadyFollowerReplicas == 0 {
err = k8sutils.UpdateRedisClusterStatus(instance, status.RedisClusterBootstrap, status.BootstrapClusterReason, leaderReplicas, followerReplicas)
if err != nil {
return ctrl.Result{RequeueAfter: time.Second * 10}, err
}
if !(instance.Status.ReadyLeaderReplicas == leaderReplicas && instance.Status.ReadyFollowerReplicas == followerReplicas) {
err = k8sutils.UpdateRedisClusterStatus(instance, status.RedisClusterBootstrap, status.BootstrapClusterReason, leaderReplicas, followerReplicas)
if err != nil {
return ctrl.Result{RequeueAfter: time.Second * 10}, err

Check warning on line 190 in controllers/rediscluster_controller.go

View check run for this annotation

Codecov / codecov/patch

controllers/rediscluster_controller.go#L187-L190

Added lines #L187 - L190 were not covered by tests
}
}

reqLogger.Info("Creating redis cluster by executing cluster creation commands", "Leaders.Ready", strconv.Itoa(int(redisLeaderInfo.Status.ReadyReplicas)), "Followers.Ready", strconv.Itoa(int(redisFollowerInfo.Status.ReadyReplicas)))
if k8sutils.CheckRedisNodeCount(ctx, r.K8sClient, r.Log, instance, "") != totalReplicas {
if nc := k8sutils.CheckRedisNodeCount(ctx, r.K8sClient, r.Log, instance, ""); nc != totalReplicas {

Check warning on line 195 in controllers/rediscluster_controller.go

View check run for this annotation

Codecov / codecov/patch

controllers/rediscluster_controller.go#L195

Added line #L195 was not covered by tests
leaderCount := k8sutils.CheckRedisNodeCount(ctx, r.K8sClient, r.Log, instance, "leader")
if leaderCount != leaderReplicas {
reqLogger.Info("Not all leader are part of the cluster...", "Leaders.Count", leaderCount, "Instance.Size", leaderReplicas)
Expand All @@ -208,16 +215,17 @@
reqLogger.Info("no follower/replicas configured, skipping replication configuration", "Leaders.Count", leaderCount, "Leader.Size", leaderReplicas, "Follower.Replicas", followerReplicas)
}
}
} else {
reqLogger.Info("Redis leader count is desired")
if int(totalReplicas) > 1 && k8sutils.CheckRedisClusterState(ctx, r.K8sClient, r.Log, instance) >= int(totalReplicas)-1 {
reqLogger.Info("Redis leader is not desired, executing failover operation")
err = k8sutils.ExecuteFailoverOperation(ctx, r.K8sClient, r.Log, instance)
if err != nil {
return ctrl.Result{RequeueAfter: time.Second * 10}, err
}
reqLogger.Info("Redis cluster count is not desired", "Current.Count", nc, "Desired.Count", totalReplicas)
return ctrl.Result{RequeueAfter: time.Second * 60}, nil

Check warning on line 219 in controllers/rediscluster_controller.go

View check run for this annotation

Codecov / codecov/patch

controllers/rediscluster_controller.go#L218-L219

Added lines #L218 - L219 were not covered by tests
}

reqLogger.Info("Redis cluster count is desired")
if int(totalReplicas) > 1 && k8sutils.CheckRedisClusterState(ctx, r.K8sClient, r.Log, instance) >= int(totalReplicas)-1 {
reqLogger.Info("Redis leader is not desired, executing failover operation")
err = k8sutils.ExecuteFailoverOperation(ctx, r.K8sClient, r.Log, instance)
if err != nil {
return ctrl.Result{RequeueAfter: time.Second * 10}, err

Check warning on line 227 in controllers/rediscluster_controller.go

View check run for this annotation

Codecov / codecov/patch

controllers/rediscluster_controller.go#L222-L227

Added lines #L222 - L227 were not covered by tests
}
return ctrl.Result{RequeueAfter: time.Second * 120}, nil
}

// Check If there is No Empty Master Node
Expand Down
22 changes: 14 additions & 8 deletions k8sutils/cluster-scaling.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@
"k8s.io/client-go/kubernetes"
)

// Reshard the redis Cluster
func ReshardRedisCluster(client kubernetes.Interface, logger logr.Logger, cr *redisv1beta2.RedisCluster) {
// ReshardRedisCluster transfer the slots from the last node to the first node.
//
// NOTE: when all slot been transferred, the node become slave of the first master node.
func ReshardRedisCluster(client kubernetes.Interface, logger logr.Logger, cr *redisv1beta2.RedisCluster, remove bool) {

Check warning on line 17 in k8sutils/cluster-scaling.go

View check run for this annotation

Codecov / codecov/patch

k8sutils/cluster-scaling.go#L17

Added line #L17 was not covered by tests
ctx := context.TODO()
var cmd []string
currentRedisCount := CheckRedisNodeCount(ctx, client, logger, cr, "leader")
Expand Down Expand Up @@ -72,6 +74,10 @@
return
}
executeCommand(client, logger, cr, cmd, cr.ObjectMeta.Name+"-leader-0")

if remove {
RemoveRedisNodeFromCluster(ctx, client, logger, cr, removePOD)
}

Check warning on line 80 in k8sutils/cluster-scaling.go

View check run for this annotation

Codecov / codecov/patch

k8sutils/cluster-scaling.go#L77-L80

Added lines #L77 - L80 were not covered by tests
}

func getRedisClusterSlots(ctx context.Context, client kubernetes.Interface, logger logr.Logger, cr *redisv1beta2.RedisCluster, nodeID string) string {
Expand Down Expand Up @@ -336,18 +342,18 @@
}

// Remove redis cluster node would remove last node to the existing redis cluster using redis-cli
func RemoveRedisNodeFromCluster(ctx context.Context, client kubernetes.Interface, logger logr.Logger, cr *redisv1beta2.RedisCluster) {
func RemoveRedisNodeFromCluster(ctx context.Context, client kubernetes.Interface, logger logr.Logger, cr *redisv1beta2.RedisCluster, removePod RedisDetails) {

Check warning on line 345 in k8sutils/cluster-scaling.go

View check run for this annotation

Codecov / codecov/patch

k8sutils/cluster-scaling.go#L345

Added line #L345 was not covered by tests
var cmd []string
currentRedisCount := CheckRedisNodeCount(ctx, client, logger, cr, "leader")
//currentRedisCount := CheckRedisNodeCount(ctx, client, logger, cr, "leader")

Check warning on line 347 in k8sutils/cluster-scaling.go

View check run for this annotation

Codecov / codecov/patch

k8sutils/cluster-scaling.go#L347

Added line #L347 was not covered by tests

existingPod := RedisDetails{
PodName: cr.ObjectMeta.Name + "-leader-0",
Namespace: cr.Namespace,
}
removePod := RedisDetails{
PodName: cr.ObjectMeta.Name + "-leader-" + strconv.Itoa(int(currentRedisCount)-1),
Namespace: cr.Namespace,
}
//removePod := RedisDetails{
// PodName: cr.ObjectMeta.Name + "-leader-" + strconv.Itoa(int(currentRedisCount)-1),
// Namespace: cr.Namespace,
//}

Check warning on line 356 in k8sutils/cluster-scaling.go

View check run for this annotation

Codecov / codecov/patch

k8sutils/cluster-scaling.go#L353-L356

Added lines #L353 - L356 were not covered by tests

cmd = []string{"redis-cli", "--cluster", "del-node"}

Expand Down
6 changes: 3 additions & 3 deletions k8sutils/redis.go
Original file line number Diff line number Diff line change
Expand Up @@ -406,20 +406,20 @@ func getContainerID(client kubernetes.Interface, logger logr.Logger, cr *redisv1
return -1, nil
}

logger.Info("Pod info retrieved successfully", "Pod Name", podName, "Namespace", cr.Namespace)
logger.V(1).Info("Pod info retrieved successfully", "Pod Name", podName, "Namespace", cr.Namespace)

targetContainer := -1
for containerID, tr := range pod.Spec.Containers {
logger.V(1).Info("Inspecting container", "Pod Name", podName, "Container ID", containerID, "Container Name", tr.Name)
if tr.Name == cr.ObjectMeta.Name+"-leader" {
targetContainer = containerID
logger.Info("Leader container found", "Container ID", containerID, "Container Name", tr.Name)
logger.V(1).Info("Leader container found", "Container ID", containerID, "Container Name", tr.Name)
break
}
}

if targetContainer == -1 {
logger.Info("Leader container not found in pod", "Pod Name", podName)
logger.V(1).Info("Leader container not found in pod", "Pod Name", podName)
return -1, nil
}

Expand Down
3 changes: 2 additions & 1 deletion tests/_config/kuttl-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@ kind: TestSuite
startKIND: false
kindConfig: "./kind-config.yaml"
parallel: 1
timeout: 300
timeout: 1200
testDirs:
- tests/e2e/v1beta2/setup
- tests/e2e/v1beta2/teardown
- tests/e2e/v1beta2/ignore-annots
- tests/e2e/v1beta2/scaling
suppress :
- events
7 changes: 7 additions & 0 deletions tests/e2e/v1beta2/scaling/redis-cluster/00-install.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
apiVersion: kuttl.dev/v1beta1
kind: TestStep
apply :
- cluster.yaml
assert :
- ready-cluster.yaml
- ready-sts.yaml
7 changes: 7 additions & 0 deletions tests/e2e/v1beta2/scaling/redis-cluster/01-scale-up.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
apiVersion: kuttl.dev/v1beta1
kind: TestStep
apply :
- cluster-scale-up.yaml
assert :
- ready-cluster-scale-up.yaml
- ready-sts-scale-up.yaml
7 changes: 7 additions & 0 deletions tests/e2e/v1beta2/scaling/redis-cluster/02-scale-down.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
apiVersion: kuttl.dev/v1beta1
kind: TestStep
apply :
- cluster.yaml
assert :
- ready-cluster.yaml
- ready-sts.yaml
47 changes: 47 additions & 0 deletions tests/e2e/v1beta2/scaling/redis-cluster/cluster-scale-up.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
apiVersion: redis.redis.opstreelabs.in/v1beta2
kind: RedisCluster
metadata:
name: redis-cluster-v1beta2
spec:
clusterSize: 6
clusterVersion: v7
persistenceEnabled: true
podSecurityContext:
runAsUser: 1000
fsGroup: 1000
kubernetesConfig:
image: quay.io/opstree/redis:latest
imagePullPolicy: Always
resources:
requests:
cpu: 101m
memory: 128Mi
limits:
cpu: 101m
memory: 128Mi
redisExporter:
enabled: true
image: quay.io/opstree/redis-exporter:v1.44.0
imagePullPolicy: Always
resources:
requests:
cpu: 100m
memory: 128Mi
limits:
cpu: 100m
memory: 128Mi
storage:
volumeClaimTemplate:
spec:
# storageClassName: standard
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 1Gi
nodeConfVolume: true
nodeConfVolumeClaimTemplate:
spec:
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 1Gi
47 changes: 47 additions & 0 deletions tests/e2e/v1beta2/scaling/redis-cluster/cluster.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
apiVersion: redis.redis.opstreelabs.in/v1beta2
kind: RedisCluster
metadata:
name: redis-cluster-v1beta2
spec:
clusterSize: 3
clusterVersion: v7
persistenceEnabled: true
podSecurityContext:
runAsUser: 1000
fsGroup: 1000
kubernetesConfig:
image: quay.io/opstree/redis:latest
imagePullPolicy: Always
resources:
requests:
cpu: 101m
memory: 128Mi
limits:
cpu: 101m
memory: 128Mi
redisExporter:
enabled: true
image: quay.io/opstree/redis-exporter:v1.44.0
imagePullPolicy: Always
resources:
requests:
cpu: 100m
memory: 128Mi
limits:
cpu: 100m
memory: 128Mi
storage:
volumeClaimTemplate:
spec:
# storageClassName: standard
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 1Gi
nodeConfVolume: true
nodeConfVolumeClaimTemplate:
spec:
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 1Gi
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
apiVersion: redis.redis.opstreelabs.in/v1beta2
kind: RedisCluster
metadata:
name: redis-cluster-v1beta2
status:
state: Ready
readyLeaderReplicas: 6
readyFollowerReplicas: 6
8 changes: 8 additions & 0 deletions tests/e2e/v1beta2/scaling/redis-cluster/ready-cluster.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
apiVersion: redis.redis.opstreelabs.in/v1beta2
kind: RedisCluster
metadata:
name: redis-cluster-v1beta2
status:
state: Ready
readyLeaderReplicas: 3
readyFollowerReplicas: 3
23 changes: 23 additions & 0 deletions tests/e2e/v1beta2/scaling/redis-cluster/ready-sts-scale-up.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: redis-cluster-v1beta2-leader
labels:
app: redis-cluster-v1beta2-leader
redis_setup_type: cluster
role: leader
status:
replicas: 6
readyReplicas: 6
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: redis-cluster-v1beta2-follower
labels:
app: redis-cluster-v1beta2-follower
redis_setup_type: cluster
role: follower
status:
replicas: 6
readyReplicas: 6
Loading
Loading