Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
Signed-off-by: Xuhui zhang <[email protected]>
  • Loading branch information
zxh326 committed Dec 5, 2024
1 parent 0e483cd commit 2b67114
Show file tree
Hide file tree
Showing 8 changed files with 62 additions and 9 deletions.
8 changes: 8 additions & 0 deletions api/v1/cachegroup_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,14 @@ type CacheGroupSpec struct {
CleanCache bool `json:"cleanCache,omitempty"`
CacheGroup string `json:"cacheGroup,omitempty"`
Worker CacheGroupWorkerSpec `json:"worker,omitempty"`
// Duration for new node to join cluster with group-backup option
// Default is 10 minutes
// +optional
BackupDuration *metav1.Duration `json:"backupDuration,omitempty"`
// Maximum time to wait for data migration when deleting
// Default is 1 hour
// +optional
WaitingDeletedMaxDuration *metav1.Duration `json:"waitingDeletedMaxDuration,omitempty"`
}

type CacheGroupPhase string
Expand Down
11 changes: 11 additions & 0 deletions api/v1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions config/crd/bases/juicefs.io_cachegroups.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ spec:
type: object
spec:
properties:
backupDuration:
type: string
cacheGroup:
type: string
cleanCache:
Expand Down Expand Up @@ -78,6 +80,8 @@ spec:
type:
type: string
type: object
waitingDeletedMaxDuration:
type: string
worker:
properties:
overwrite:
Expand Down
4 changes: 2 additions & 2 deletions config/manager/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@ apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
images:
- name: controller
newName: registry.zzde.me/juicefs-cache-group-operator
newTag: v0.2.5
newName: controller
newTag: latest
4 changes: 4 additions & 0 deletions dist/crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ spec:
type: object
spec:
properties:
backupDuration:
type: string
cacheGroup:
type: string
cleanCache:
Expand Down Expand Up @@ -77,6 +79,8 @@ spec:
type:
type: string
type: object
waitingDeletedMaxDuration:
type: string
worker:
properties:
overwrite:
Expand Down
20 changes: 14 additions & 6 deletions internal/controller/cachegroup_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ func (r *CacheGroupReconciler) removeRedundantWorkers(
}

if cg.Status.ReadyWorker > 1 {
delete, err := r.gracefulShutdownWorker(ctx, &worker)
delete, err := r.gracefulShutdownWorker(ctx, cg, &worker)
if err != nil {
log.Error(err, "failed to graceful shutdown worker", "worker", worker)
return err
Expand All @@ -350,11 +350,13 @@ func (r *CacheGroupReconciler) removeRedundantWorkers(
}

// change pod options `group-weight` to zero, delete and recreate the worker pod
func (r *CacheGroupReconciler) gracefulShutdownWorker(ctx context.Context, worker *corev1.Pod) (delete bool, err error) {
func (r *CacheGroupReconciler) gracefulShutdownWorker(
ctx context.Context,
cg *juicefsiov1.CacheGroup,
worker *corev1.Pod) (delete bool, err error) {
log := log.FromContext(ctx)
isReady := utils.IsPodReady(*worker) && utils.IsMountPointReady(ctx, *worker, common.MountPoint)
if !isReady {
// TODO: add a timeout for data migration.
if _, ok := worker.Annotations[common.AnnoWaitingDeleteWorker]; ok {
return false, nil
}
Expand All @@ -372,7 +374,13 @@ func (r *CacheGroupReconciler) gracefulShutdownWorker(ctx context.Context, worke
return true, nil
}

if _, ok := worker.Annotations[common.AnnoWaitingDeleteWorker]; ok {
if v, ok := worker.Annotations[common.AnnoWaitingDeleteWorker]; ok {
waitingAt := utils.MustParseTime(v)
if time.Since(waitingAt) > utils.GetWaitingDeletedMaxDuration(cg.Spec.WaitingDeletedMaxDuration) {
log.Info("redundant worker still has cache blocks, waiting for data migration timeout, delete it", "worker", worker.Name)
return true, nil
}
// already set group-weight to 0
return false, nil
}
log.V(1).Info("redundant worker has cache blocks, recreate to set group-weight to 0", "worker", worker.Name, "cacheBytes", cacheBytes)
Expand All @@ -398,11 +406,11 @@ func (r *CacheGroupReconciler) shouldAddGroupBackupOrNot(cg *juicefsiov1.CacheGr
if actual == nil {
return cg.Status.ReadyWorker >= 1
}
// If this node has been added group-backup for 10 minutes
// If this node has been added group-backup for x(default 10m) minutes
// then this node is a normal worker.
if v, ok := actual.Annotations[common.AnnoBackupWorker]; ok {
backupAt := utils.MustParseTime(v)
return time.Since(backupAt) < common.BackupWorkerDuration
return time.Since(backupAt) < utils.GetBackupWorkerDuration(cg.Spec.BackupDuration)
}
return false
}
Expand Down
3 changes: 2 additions & 1 deletion pkg/common/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ var (
},
}

BackupWorkerDuration = 10 * time.Minute
DefaultBackupWorkerDuration = 10 * time.Minute
DefaultWaitingMaxDuration = 1 * time.Hour
)

func GenWorkerName(cgName string, nodeName string) string {
Expand Down
17 changes: 17 additions & 0 deletions pkg/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ import (
"strconv"
"strings"
"time"

"github.com/juicedata/juicefs-cache-group-operator/pkg/common"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

func ToPtr[T any](v T) *T {
Expand Down Expand Up @@ -94,3 +97,17 @@ func CompareEEImageVersion(image, target string) int {

return 0
}

func GetWaitingDeletedMaxDuration(d *metav1.Duration) time.Duration {
if d == nil {
return common.DefaultWaitingMaxDuration
}
return d.Duration
}

func GetBackupWorkerDuration(d *metav1.Duration) time.Duration {
if d == nil {
return common.DefaultBackupWorkerDuration
}
return d.Duration
}

0 comments on commit 2b67114

Please sign in to comment.