Skip to content

Commit

Permalink
[Fix] : cluster scaling (#704)
Browse files Browse the repository at this point in the history
* add scale e2e yamls

Signed-off-by: drivebyer <[email protected]>

* temp

Signed-off-by: drivebyer <[email protected]>

* fix the cluster scale

Signed-off-by: drivebyer <[email protected]>

* adjust e2e

Signed-off-by: drivebyer <[email protected]>

---------

Signed-off-by: drivebyer <[email protected]>
Signed-off-by: drivebyer <[email protected]>
  • Loading branch information
drivebyer authored Nov 20, 2023
1 parent 6502c91 commit a597c39
Show file tree
Hide file tree
Showing 14 changed files with 245 additions and 50 deletions.
1 change: 1 addition & 0 deletions .github/workflows/e2e.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ jobs:
- ./tests/e2e/v1beta2/setup
- ./tests/e2e/v1beta2/teardown
- ./tests/e2e/v1beta2/ignore-annots
- ./tests/e2e/v1beta2/scaling

steps:
- name: Checkout code
Expand Down
84 changes: 46 additions & 38 deletions controllers/rediscluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,27 +75,36 @@ func (r *RedisClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request

// Check if the cluster is downscaled
if leaderReplicas < instance.Status.ReadyLeaderReplicas {

// Imp if the last index of leader sts is not leader make it then
// check whether the redis is leader or not ?
// if not true then make it leader pod

if !(k8sutils.VerifyLeaderPod(ctx, r.K8sClient, r.Log, instance)) {
// lastLeaderPod is slaving right now Make it the master Pod
// We have to bring a manual failover here to make it a leaderPod
// clusterFailover should also include the clusterReplicate since we have to map the followers to new leader
k8sutils.ClusterFailover(ctx, r.K8sClient, r.Log, instance)
reqLogger.Info("Redis cluster is downscaling...", "Ready.ReadyLeaderReplicas", instance.Status.ReadyLeaderReplicas, "Expected.ReadyLeaderReplicas", leaderReplicas)

// loop count times to remove the latest leader/follower pod
count := instance.Status.ReadyLeaderReplicas - leaderReplicas
for i := int32(0); i < count; i++ {
reqLogger.Info("Redis cluster is downscaling", "The times of loop", i)

// Imp if the last index of leader sts is not leader make it then
// check whether the redis is leader or not ?
// if not true then make it leader pod
if !(k8sutils.VerifyLeaderPod(ctx, r.K8sClient, r.Log, instance)) {
// lastLeaderPod is slaving right now Make it the master Pod
// We have to bring a manual failover here to make it a leaderPod
// clusterFailover should also include the clusterReplicate since we have to map the followers to new leader
k8sutils.ClusterFailover(ctx, r.K8sClient, r.Log, instance)
}
// Step 1 Remove the Follower Node
k8sutils.RemoveRedisFollowerNodesFromCluster(ctx, r.K8sClient, r.Log, instance)
// Step 2 Reshard the Cluster
k8sutils.ReshardRedisCluster(r.K8sClient, r.Log, instance, true)
}

// Step 1 Rehard the Cluster
k8sutils.ReshardRedisCluster(r.K8sClient, r.Log, instance)
// Step 2 Remove the Follower Node
k8sutils.RemoveRedisFollowerNodesFromCluster(ctx, r.K8sClient, r.Log, instance)
// Step 3 Remove the Leader Node
k8sutils.RemoveRedisNodeFromCluster(ctx, r.K8sClient, r.Log, instance)
// Step 4 Rebalance the cluster
reqLogger.Info("Redis cluster is downscaled... Rebalancing the cluster")
// Step 3 Rebalance the cluster
k8sutils.RebalanceRedisCluster(r.K8sClient, r.Log, instance)
return ctrl.Result{RequeueAfter: time.Second * 100}, nil
reqLogger.Info("Redis cluster is downscaled... Rebalancing the cluster is done")
err = k8sutils.UpdateRedisClusterStatus(instance, status.RedisClusterReady, status.ReadyClusterReason, leaderReplicas, leaderReplicas)
if err != nil {
return ctrl.Result{RequeueAfter: time.Second * 10}, err
}
return ctrl.Result{RequeueAfter: time.Second * 60}, nil
}

// Mark the cluster status as initializing if there are no leader or follower nodes
Expand Down Expand Up @@ -130,7 +139,7 @@ func (r *RedisClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request
return ctrl.Result{}, err
}

if int32(redisLeaderInfo.Status.ReadyReplicas) == leaderReplicas {
if redisLeaderInfo.Status.ReadyReplicas == leaderReplicas {

// Mark the cluster status as initializing if there are no follower nodes
if instance.Status.ReadyLeaderReplicas == 0 && instance.Status.ReadyFollowerReplicas == 0 {
Expand Down Expand Up @@ -166,26 +175,24 @@ func (r *RedisClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request

if leaderReplicas == 0 {
reqLogger.Info("Redis leaders Cannot be 0", "Ready.Replicas", strconv.Itoa(int(redisLeaderInfo.Status.ReadyReplicas)), "Expected.Replicas", leaderReplicas)
return ctrl.Result{RequeueAfter: time.Second * 120}, nil
return ctrl.Result{RequeueAfter: time.Second * 60}, nil
}

if !(redisLeaderInfo.Status.ReadyReplicas == leaderReplicas && redisFollowerInfo.Status.ReadyReplicas == followerReplicas) {
reqLogger.Info("Redis leader and follower nodes are not ready yet", "Ready.Replicas", strconv.Itoa(int(redisLeaderInfo.Status.ReadyReplicas)), "Expected.Replicas", leaderReplicas)
return ctrl.Result{RequeueAfter: time.Second * 120}, nil
return ctrl.Result{RequeueAfter: time.Second * 60}, nil
}

// Mark the cluster status as bootstrapping if all the leader and follower nodes are ready
if int32(redisLeaderInfo.Status.ReadyReplicas) == leaderReplicas && int32(redisFollowerInfo.Status.ReadyReplicas) == followerReplicas {
if instance.Status.ReadyLeaderReplicas == leaderReplicas && instance.Status.ReadyFollowerReplicas == 0 {
err = k8sutils.UpdateRedisClusterStatus(instance, status.RedisClusterBootstrap, status.BootstrapClusterReason, leaderReplicas, followerReplicas)
if err != nil {
return ctrl.Result{RequeueAfter: time.Second * 10}, err
}
if !(instance.Status.ReadyLeaderReplicas == leaderReplicas && instance.Status.ReadyFollowerReplicas == followerReplicas) {
err = k8sutils.UpdateRedisClusterStatus(instance, status.RedisClusterBootstrap, status.BootstrapClusterReason, leaderReplicas, followerReplicas)
if err != nil {
return ctrl.Result{RequeueAfter: time.Second * 10}, err
}
}

reqLogger.Info("Creating redis cluster by executing cluster creation commands", "Leaders.Ready", strconv.Itoa(int(redisLeaderInfo.Status.ReadyReplicas)), "Followers.Ready", strconv.Itoa(int(redisFollowerInfo.Status.ReadyReplicas)))
if k8sutils.CheckRedisNodeCount(ctx, r.K8sClient, r.Log, instance, "") != totalReplicas {
if nc := k8sutils.CheckRedisNodeCount(ctx, r.K8sClient, r.Log, instance, ""); nc != totalReplicas {
leaderCount := k8sutils.CheckRedisNodeCount(ctx, r.K8sClient, r.Log, instance, "leader")
if leaderCount != leaderReplicas {
reqLogger.Info("Not all leader are part of the cluster...", "Leaders.Count", leaderCount, "Instance.Size", leaderReplicas)
Expand All @@ -208,16 +215,17 @@ func (r *RedisClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request
reqLogger.Info("no follower/replicas configured, skipping replication configuration", "Leaders.Count", leaderCount, "Leader.Size", leaderReplicas, "Follower.Replicas", followerReplicas)
}
}
} else {
reqLogger.Info("Redis leader count is desired")
if int(totalReplicas) > 1 && k8sutils.CheckRedisClusterState(ctx, r.K8sClient, r.Log, instance) >= int(totalReplicas)-1 {
reqLogger.Info("Redis leader is not desired, executing failover operation")
err = k8sutils.ExecuteFailoverOperation(ctx, r.K8sClient, r.Log, instance)
if err != nil {
return ctrl.Result{RequeueAfter: time.Second * 10}, err
}
reqLogger.Info("Redis cluster count is not desired", "Current.Count", nc, "Desired.Count", totalReplicas)
return ctrl.Result{RequeueAfter: time.Second * 60}, nil
}

reqLogger.Info("Redis cluster count is desired")
if int(totalReplicas) > 1 && k8sutils.CheckRedisClusterState(ctx, r.K8sClient, r.Log, instance) >= int(totalReplicas)-1 {
reqLogger.Info("Redis leader is not desired, executing failover operation")
err = k8sutils.ExecuteFailoverOperation(ctx, r.K8sClient, r.Log, instance)
if err != nil {
return ctrl.Result{RequeueAfter: time.Second * 10}, err
}
return ctrl.Result{RequeueAfter: time.Second * 120}, nil
}

// Check If there is No Empty Master Node
Expand Down
22 changes: 14 additions & 8 deletions k8sutils/cluster-scaling.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@ import (
"k8s.io/client-go/kubernetes"
)

// Reshard the redis Cluster
func ReshardRedisCluster(client kubernetes.Interface, logger logr.Logger, cr *redisv1beta2.RedisCluster) {
// ReshardRedisCluster transfer the slots from the last node to the first node.
//
// NOTE: when all slot been transferred, the node become slave of the first master node.
func ReshardRedisCluster(client kubernetes.Interface, logger logr.Logger, cr *redisv1beta2.RedisCluster, remove bool) {
ctx := context.TODO()
var cmd []string
currentRedisCount := CheckRedisNodeCount(ctx, client, logger, cr, "leader")
Expand Down Expand Up @@ -72,6 +74,10 @@ func ReshardRedisCluster(client kubernetes.Interface, logger logr.Logger, cr *re
return
}
executeCommand(client, logger, cr, cmd, cr.ObjectMeta.Name+"-leader-0")

if remove {
RemoveRedisNodeFromCluster(ctx, client, logger, cr, removePOD)
}
}

func getRedisClusterSlots(ctx context.Context, client kubernetes.Interface, logger logr.Logger, cr *redisv1beta2.RedisCluster, nodeID string) string {
Expand Down Expand Up @@ -336,18 +342,18 @@ func RemoveRedisFollowerNodesFromCluster(ctx context.Context, client kubernetes.
}

// Remove redis cluster node would remove last node to the existing redis cluster using redis-cli
func RemoveRedisNodeFromCluster(ctx context.Context, client kubernetes.Interface, logger logr.Logger, cr *redisv1beta2.RedisCluster) {
func RemoveRedisNodeFromCluster(ctx context.Context, client kubernetes.Interface, logger logr.Logger, cr *redisv1beta2.RedisCluster, removePod RedisDetails) {
var cmd []string
currentRedisCount := CheckRedisNodeCount(ctx, client, logger, cr, "leader")
//currentRedisCount := CheckRedisNodeCount(ctx, client, logger, cr, "leader")

existingPod := RedisDetails{
PodName: cr.ObjectMeta.Name + "-leader-0",
Namespace: cr.Namespace,
}
removePod := RedisDetails{
PodName: cr.ObjectMeta.Name + "-leader-" + strconv.Itoa(int(currentRedisCount)-1),
Namespace: cr.Namespace,
}
//removePod := RedisDetails{
// PodName: cr.ObjectMeta.Name + "-leader-" + strconv.Itoa(int(currentRedisCount)-1),
// Namespace: cr.Namespace,
//}

cmd = []string{"redis-cli", "--cluster", "del-node"}

Expand Down
6 changes: 3 additions & 3 deletions k8sutils/redis.go
Original file line number Diff line number Diff line change
Expand Up @@ -406,20 +406,20 @@ func getContainerID(client kubernetes.Interface, logger logr.Logger, cr *redisv1
return -1, nil
}

logger.Info("Pod info retrieved successfully", "Pod Name", podName, "Namespace", cr.Namespace)
logger.V(1).Info("Pod info retrieved successfully", "Pod Name", podName, "Namespace", cr.Namespace)

targetContainer := -1
for containerID, tr := range pod.Spec.Containers {
logger.V(1).Info("Inspecting container", "Pod Name", podName, "Container ID", containerID, "Container Name", tr.Name)
if tr.Name == cr.ObjectMeta.Name+"-leader" {
targetContainer = containerID
logger.Info("Leader container found", "Container ID", containerID, "Container Name", tr.Name)
logger.V(1).Info("Leader container found", "Container ID", containerID, "Container Name", tr.Name)
break
}
}

if targetContainer == -1 {
logger.Info("Leader container not found in pod", "Pod Name", podName)
logger.V(1).Info("Leader container not found in pod", "Pod Name", podName)
return -1, nil
}

Expand Down
3 changes: 2 additions & 1 deletion tests/_config/kuttl-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@ kind: TestSuite
startKIND: false
kindConfig: "./kind-config.yaml"
parallel: 1
timeout: 300
timeout: 1200
testDirs:
- tests/e2e/v1beta2/setup
- tests/e2e/v1beta2/teardown
- tests/e2e/v1beta2/ignore-annots
- tests/e2e/v1beta2/scaling
suppress :
- events
7 changes: 7 additions & 0 deletions tests/e2e/v1beta2/scaling/redis-cluster/00-install.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
apiVersion: kuttl.dev/v1beta1
kind: TestStep
apply :
- cluster.yaml
assert :
- ready-cluster.yaml
- ready-sts.yaml
7 changes: 7 additions & 0 deletions tests/e2e/v1beta2/scaling/redis-cluster/01-scale-up.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
apiVersion: kuttl.dev/v1beta1
kind: TestStep
apply :
- cluster-scale-up.yaml
assert :
- ready-cluster-scale-up.yaml
- ready-sts-scale-up.yaml
7 changes: 7 additions & 0 deletions tests/e2e/v1beta2/scaling/redis-cluster/02-scale-down.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
apiVersion: kuttl.dev/v1beta1
kind: TestStep
apply :
- cluster.yaml
assert :
- ready-cluster.yaml
- ready-sts.yaml
47 changes: 47 additions & 0 deletions tests/e2e/v1beta2/scaling/redis-cluster/cluster-scale-up.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
apiVersion: redis.redis.opstreelabs.in/v1beta2
kind: RedisCluster
metadata:
name: redis-cluster-v1beta2
spec:
clusterSize: 6
clusterVersion: v7
persistenceEnabled: true
podSecurityContext:
runAsUser: 1000
fsGroup: 1000
kubernetesConfig:
image: quay.io/opstree/redis:latest
imagePullPolicy: Always
resources:
requests:
cpu: 101m
memory: 128Mi
limits:
cpu: 101m
memory: 128Mi
redisExporter:
enabled: true
image: quay.io/opstree/redis-exporter:v1.44.0
imagePullPolicy: Always
resources:
requests:
cpu: 100m
memory: 128Mi
limits:
cpu: 100m
memory: 128Mi
storage:
volumeClaimTemplate:
spec:
# storageClassName: standard
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 1Gi
nodeConfVolume: true
nodeConfVolumeClaimTemplate:
spec:
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 1Gi
47 changes: 47 additions & 0 deletions tests/e2e/v1beta2/scaling/redis-cluster/cluster.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
apiVersion: redis.redis.opstreelabs.in/v1beta2
kind: RedisCluster
metadata:
name: redis-cluster-v1beta2
spec:
clusterSize: 3
clusterVersion: v7
persistenceEnabled: true
podSecurityContext:
runAsUser: 1000
fsGroup: 1000
kubernetesConfig:
image: quay.io/opstree/redis:latest
imagePullPolicy: Always
resources:
requests:
cpu: 101m
memory: 128Mi
limits:
cpu: 101m
memory: 128Mi
redisExporter:
enabled: true
image: quay.io/opstree/redis-exporter:v1.44.0
imagePullPolicy: Always
resources:
requests:
cpu: 100m
memory: 128Mi
limits:
cpu: 100m
memory: 128Mi
storage:
volumeClaimTemplate:
spec:
# storageClassName: standard
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 1Gi
nodeConfVolume: true
nodeConfVolumeClaimTemplate:
spec:
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 1Gi
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
apiVersion: redis.redis.opstreelabs.in/v1beta2
kind: RedisCluster
metadata:
name: redis-cluster-v1beta2
status:
state: Ready
readyLeaderReplicas: 6
readyFollowerReplicas: 6
8 changes: 8 additions & 0 deletions tests/e2e/v1beta2/scaling/redis-cluster/ready-cluster.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
apiVersion: redis.redis.opstreelabs.in/v1beta2
kind: RedisCluster
metadata:
name: redis-cluster-v1beta2
status:
state: Ready
readyLeaderReplicas: 3
readyFollowerReplicas: 3
23 changes: 23 additions & 0 deletions tests/e2e/v1beta2/scaling/redis-cluster/ready-sts-scale-up.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: redis-cluster-v1beta2-leader
labels:
app: redis-cluster-v1beta2-leader
redis_setup_type: cluster
role: leader
status:
replicas: 6
readyReplicas: 6
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: redis-cluster-v1beta2-follower
labels:
app: redis-cluster-v1beta2-follower
redis_setup_type: cluster
role: follower
status:
replicas: 6
readyReplicas: 6
Loading

0 comments on commit a597c39

Please sign in to comment.